Analyze AFM tip solvation

This notebook demonstrates deposition of an SDS adsorption layer on a non-spherical AFM tip model.

Initialization

IPython magic

In [270]:
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [14]:
%aimport
Modules to reload:
all-except-skipped

Modules to skip:

Imports

In [271]:
import ase.io # here used for reading pdb files
from ase.visualize import view
from ase.visualize.plot import plot_atoms # has nasty offset issues
from cycler import cycler # here used for cycling through colors in plots
import datetime
import fabric # for pythonic ssh connections
from fireworks import LaunchPad, Firework, Tracker, Workflow 
from fireworks import FileTransferTask, PyTask, ScriptTask

# FireWorks functionality 
from fireworks import Firework, LaunchPad, ScriptTask, Workflow
from fireworks.user_objects.firetasks.templatewriter_task import TemplateWriterTask
from fireworks.user_objects.firetasks.filepad_tasks import AddFilesTask, GetFilesTask, GetFilesByQueryTask
from imteksimfw.fireworks.user_objects.firetasks.cmd_tasks import CmdTask
from fireworks.utilities.filepad import FilePad # direct FilePad access, similar to the familiar LaunchPad

from collections.abc import Iterable
import glob
import gc # manually clean up memory with gc.collect()
import gromacs # GromacsWrapper, here used for evoking gmc commands, reading and writing .ndx files
# from io import StringIO, TextIOWrapper
import io
from IPython.display import display, Image #, Video # display image files within notebook
from ipywidgets import Video  # display video within notebook
import itertools # for products of iterables
import json # generic serialization of lists and dicts
import jinja2 # here used for filling packmol input script template
import jinja2.meta # for gathering variables in a jinja2 template
import logging 
import matplotlib.pyplot as plt
import MDAnalysis as mda # here used for reading and analyzing gromacs trajectories
import MDAnalysis.analysis.rdf as mda_rdf
import MDAnalysis.analysis.rms as mda_rms
from mpl_toolkits.mplot3d import Axes3D # here used for 3d point cloud scatter plot
import miniball # finds minimum bounding sphere of a point set
import nglview
import numpy as np
import os, os.path
import pandas as pd
import panedr # reads GROMACS edr into pandas df, requires pandas and pbr
import parmed as pmd # has quite a few advantages over ASE when it comes to parsing pdb
from pprint import pprint
import pymongo # for sorting in queries
import scipy.constants as sc
import subprocess # used for evoking external packmol
import sys
import tempfile
import yaml

GromacsWrapper might need a file ~/.gromacswrapper.cfg with content

[Gromacs]
tools = gmx gmx_d 
# gmx_mpi_d gmx_mpi_d

# name of the logfile that is written to the current directory
logfilename = gromacs.log

# loglevels (see Python's logging module for details)
#   ERROR   only fatal errors
#   WARN    only warnings
#   INFO    interesting messages
#   DEBUG   everything

# console messages written to screen
loglevel_console = INFO

# file messages written to logfilename
loglevel_file = DEBUG

in order to know the GROMACS executables it is allowed to use. Otherwise, calls to gmx_mpi or gmx_mpi_d without MPI wrapper might lead to MPI warnings in output that cause GromacsWrapper to fail.

Logging

In [272]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

ParmEd needs to know the GROMACS topology folder, usually get this from envionment variable GMXLIB:

Function definitions

In [273]:
def find_undeclared_variables(infile):
    """identify all variables evaluated in a jinja 2 template file"""
    env = jinja2.Environment()
    with open(infile) as template_file:
        parsed = env.parse(template_file.read())

    undefined = jinja2.meta.find_undeclared_variables(parsed)
    return undefined
In [274]:
def memuse():
    """Quick overview on memory usage of objects in Jupyter notebook"""
    # https://stackoverflow.com/questions/40993626/list-memory-usage-in-ipython-and-jupyter
    # These are the usual ipython objects, including this one you are creating
    ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

    # Get a sorted list of the objects and their sizes
    return sorted([(x, sys.getsizeof(globals().get(x))) for x in dir(sys.modules['__main__']) if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

Global settings

In [19]:
# pandas settings
pd.options.display.max_rows = 200
pd.options.display.max_columns = 16
pd.options.display.max_colwidth = 256
In [275]:
os.environ['GMXLIB'] = '/gmx_top'
In [276]:
# pmd.gromacs.GROMACS_TOPDIR = os.environ['GMXLIB']
pmd.gromacs.GROMACS_TOPDIR = '/gmx_top'
In [277]:
# prefix = '/mnt/dat/work/testuser/indenter/sandbox/20191110_packmol'
prefix = '/mnt/dat/work'
In [278]:
work_prefix = '/mnt/dat/work/tmp'
In [279]:
try:
    os.mkdir(work_prefix)
except FileExistsError as exc:
    print(exc)
[Errno 17] File exists: '/mnt/dat/work/tmp'
In [280]:
os.chdir(work_prefix)
In [281]:
# the FireWorks LaunchPad
lp = LaunchPad.auto_load() #Define the server and database
# FilePad behaves analogous to LaunchPad
fp = FilePad.auto_load()

Conversion from LAMMPS data format to PDB

The following bash / tcl snippet converts a LAMMPS data file to PDB, assigning the desired names as mapped in a yaml file

#!/bin/bash
# echo "package require jlhvmd; jlh lmp2pdb indenter.lammps indenter.pdb" | vmd -eofexit
vmd -eofexit << 'EOF'
package require jlhvmd
topo readlammpsdata indenter.lammps
jlh type2name SDS_type2name.yaml
jlh name2res  SDS_name2res.yaml
set sel [atomselect top all]
$sel writepdb indenter.pdb
EOF

pdb_chain.py indenter.pdb > indenter_wo_chainid.pdb
pdb_reres_by_atom_9999.py indenter_wo_chainid.pdb > indenter_reres.pdb

Requires

Overview

Overview on projects in database

In [27]:
query = {'metadata.datetime': {'$gt': '2020'} }
In [28]:
fp.filepad.count_documents(query)
Out[28]:
8937
In [29]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 'project': '$metadata.project' },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # pull 'project' field up in hierarchy
        "$addFields": { 
            "project": "$_id.project",
        },
    },
    {  # drop nested '_id.project'
        "$project": { 
            "_id": False 
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

# sort_aggregation
#aggregation_pipeline = [ match_aggregation, group_aggregation, set_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [c for c in cursor]
res_df = pd.DataFrame(data=res) # pandas Dataframe is just nice for printing in notebook
In [30]:
res_df
Out[30]:
object_count earliest latest project
0 1653 2020-07-21 01:13:12.200383 2020-07-21 01:13:13.756743 2020-07-21-passivation-trial
1 278 2020-07-20 22:19:46.798328 2020-07-20 22:19:48.273066 2020-07-20-passivation-trial-d
2 316 2020-07-20 16:28:44.921863 2020-07-20 16:28:47.699103 2020-07-20-passivation-trial-c
3 278 2020-07-20 12:31:37.328725 2020-07-20 12:31:41.060445 2020-07-20-passivation-trial-b
4 278 2020-07-20 10:48:46.162767 2020-07-20 10:48:47.754857 2020-07-20-passivation-trial
5 1674 2020-07-19 22:52:59.921460 2020-07-19 22:53:01.334582 2020-07-19-passivation
6 278 2020-07-19 00:39:08.874821 2020-07-19 00:39:09.172045 2020-07-19-passivation-trial-a
7 278 2020-07-19 00:24:15.492750 2020-07-19 00:24:15.791253 2020-07-18-passivation-trial-c
8 152 2020-07-18 22:33:35.817622 2020-07-18 22:33:36.072923 2020-07-18-passivation-trial-b
9 278 2020-07-18 17:24:37.304667 2020-07-18 17:24:37.643817 2020-07-18-passivation-trial
10 278 2020-07-18 15:13:57.808691 2020-07-18 15:13:58.077915 2020-08-18-passivation-trial
11 121 2020-07-17 21:44:13.197047 2020-07-17 21:44:13.508164 2020-07-17-passivation-trial
12 1386 2020-07-09 01:30:15.276530 2020-07-09 01:30:16.819138 2020-07-09-passivation
13 350 2020-07-08 13:31:27.106684 2020-07-08 14:02:41.994152 2020-07-08-passiv-trial-revisited
14 236 2020-07-02 19:37:19.035754 2020-07-02 19:37:19.770123 2020-07-03-passiv-trial-revisited
15 54 2020-07-02 03:49:00.103034 2020-07-02 03:49:00.325450 2020-07-01-passiv-trial-revisited
16 6 2020-05-11 15:34:17.973141 2020-05-11 15:34:18.054752 2020-05-11-2nd-passiv-trial
17 64 2020-05-11 00:46:35.427907 2020-05-11 00:46:35.637375 2020-05-11-passiv-trial
18 76 2020-05-09 23:23:58.405636 2020-05-09 23:23:58.629348 2020-05-10-passiv-trial
19 54 2020-05-09 18:06:29.244729 2020-05-09 18:06:29.470488 2020-05-09-parametric
20 276 2020-05-08 17:40:43.151560 2020-05-08 20:09:19.785964 2020-05-08-final
21 8 2020-05-08 16:46:54.125267 2020-05-08 16:46:55.061579 2020-05-08-dtool-trial
22 102 2020-05-06 19:57:34.090873 2020-05-06 23:02:33.696573 2020-05-06-indenter-passivation-trial
23 4 2020-05-04 19:56:27.458671 2020-05-04 19:56:27.458686 2020-05-04-gmx-em-dtool-trial
24 1 2020-05-04 17:47:46.398832 2020-05-04 17:47:46.398832 2020-04-29-gmx-nvt-trial
25 15 2020-04-29 20:03:40.694070 2020-04-29 21:43:41.499280 2020-04-23-gmx-nvt-trial
26 102 2020-04-23 00:42:50.738462 2020-04-23 12:29:04.927501 2020-04-23-indenter-passivation-trial
27 6 2020-04-22 23:52:50.724373 2020-04-22 23:52:50.724774 2020-04-22-gmx-nvt-trial
28 30 2020-04-22 20:12:52.975107 2020-04-22 20:12:52.992097 2020-04-22-intermediate-trial
29 12 2020-04-22 15:35:33.694291 2020-04-22 19:37:27.812809 2020-04-22-trajectory-rendering-trial
30 23 2020-04-22 00:22:22.239748 2020-04-22 00:22:22.241080 2020-04-21-gmx-chain-wf-trial
31 2 2020-04-21 21:08:51.961011 2020-04-21 21:08:51.961024 2020-04-21-gmx-solvate-trial
32 42 2020-04-21 17:33:16.919815 2020-04-21 23:19:49.893920 2020-04-21-intermediate-trial
33 7 2020-04-21 15:35:02.223005 2020-04-21 15:35:02.223026 2020-04-21-gmx-pull-trial
34 15 2020-04-15 12:12:58.569894 2020-04-21 14:39:06.617272 2020-04-15-gmx-pull-prep-trial
35 74 2020-04-15 01:58:21.918222 2020-04-21 13:38:43.462681 2020-04-15-intermediate-trial
36 4 2020-04-15 00:28:04.783910 2020-04-15 00:28:04.783995 2020-04-15-gmx-em-trial
37 3 2020-04-14 21:19:31.298223 2020-04-14 21:19:31.298232 2020-04-14-gmx-prep-trial
38 2 2020-04-14 17:14:15.144625 2020-04-14 20:23:20.587799 2020-04-14-packmol-trial
39 2 2020-04-03 01:25:09.142195 2020-04-07 16:18:42.232568 2020-04-02-surfactant-molecule-measures-trial
40 5 2020-04-02 21:18:13.804918 2020-04-12 20:56:30.319559 2020-03-31-local-trial
41 39 2020-03-25 12:11:11.906332 2020-03-25 22:47:26.604903 juwels-pull-2020-03-17
42 8 2020-03-13 12:33:31.922163 2020-03-13 12:33:31.922497 juwels-afm-probe-solvation-trial-a-2020-03-13
43 10 2020-03-12 12:31:29.329732 2020-03-12 15:55:54.013161 juwels-gromacs-em-2020-03-12
44 42 2020-03-11 18:45:59.775161 2020-03-12 00:15:43.047626 juwels-gromacs-prep-2020-03-11
45 15 2020-03-09 18:13:09.455387 2020-03-09 23:30:45.006483 juwels-packmol-2020-03-09

Overview on steps in project

In [31]:
project_id = '2020-07-21-passivation-trial'
In [32]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
}
In [33]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[33]:
1666
In [34]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [35]:
res_df
Out[35]:
step earliest latest object_count
0 GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.403514 2020-07-21 01:13:13.756743 252
1 GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.391877 2020-07-21 01:13:13.743321 252
2 GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.378821 2020-07-21 01:13:13.729390 267
3 GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.365516 2020-07-21 01:13:13.714240 240
4 GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.353308 2020-07-21 01:13:13.700739 24
5 GromacsPull:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.352999 2020-07-21 01:13:13.700579 276
6 GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.340208 2020-07-21 01:13:13.686957 48
7 GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.339847 2020-07-21 01:13:13.686626 228
8 GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.327761 2020-07-21 01:13:13.671310 36
9 SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.327455 2020-07-21 01:13:13.670757 24
10 PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.288514 2020-07-21 01:13:12.288523 2
11 SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.254018 2020-07-21 01:13:12.254026 2
12 IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad 2020-07-21 01:13:12.200383 2020-07-21 01:13:12.200392 2
13 GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles None None 1
14 IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles None None 1
15 GromacsRelaxation:GromacsRelaxationMain:push_infiles None None 1
16 SphericalSurfactantPacking:push_infiles None None 2
17 GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles None None 1
18 SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles None None 1
19 GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles None None 1
20 GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles None None 2
21 GromacsPullPrep:GromacsPullPrepMain:push_infiles None None 2
22 GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles None None 1
In [36]:
res_df['step'].values
Out[36]:
array(['GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',
       ' GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad',
       'SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad',
       'PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad',
       'SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad',
       'IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad',
       'GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles',
       'IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles',
       'GromacsRelaxation:GromacsRelaxationMain:push_infiles',
       'SphericalSurfactantPacking:push_infiles',
       'GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles',
       'SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles',
       'GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles',
       'GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles',
       'GromacsPullPrep:GromacsPullPrepMain:push_infiles',
       'GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles'],
      dtype=object)

Overview on objects in project

In [37]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
}
In [38]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[38]:
1666
In [39]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'step', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [40]:
res_df
Out[40]:
type step name earliest latest object_count
0 mp4_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403569 2020-07-21 01:13:13.756743 12
1 surfactant_tail_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403567 2020-07-21 01:13:13.756741 12
2 surfactant_head_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403564 2020-07-21 01:13:13.756738 12
3 substrate_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403561 2020-07-21 01:13:13.756736 12
4 counterion_rmsd GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403559 2020-07-21 01:13:13.756734 12
5 surfactant_tail_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403556 2020-07-21 01:13:13.756731 12
6 surfactant_head_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403554 2020-07-21 01:13:13.756729 12
7 surfactant_head_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403551 2020-07-21 01:13:13.756727 12
8 substrate_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403549 2020-07-21 01:13:13.756725 12
9 substrate_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403546 2020-07-21 01:13:13.756722 12
10 substrate_substrate_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403544 2020-07-21 01:13:13.756720 12
11 counterion_surfactant_tail_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403541 2020-07-21 01:13:13.756718 12
12 counterion_surfactant_head_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403539 2020-07-21 01:13:13.756716 12
13 counterion_substrate_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403536 2020-07-21 01:13:13.756713 12
14 counterion_counterion_rdf GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403534 2020-07-21 01:13:13.756711 12
15 index_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403531 2020-07-21 01:13:13.756709 12
16 topology_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403529 2020-07-21 01:13:13.756706 12
17 data_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403526 2020-07-21 01:13:13.756704 12
18 trajectory_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403524 2020-07-21 01:13:13.756702 12
19 energy_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403521 2020-07-21 01:13:13.756699 12
20 log_file GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.403514 2020-07-21 01:13:13.756693 12
21 mp4_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391930 2020-07-21 01:13:13.743321 12
22 surfactant_tail_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391927 2020-07-21 01:13:13.743318 12
23 surfactant_head_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391925 2020-07-21 01:13:13.743316 12
24 substrate_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391922 2020-07-21 01:13:13.743313 12
25 counterion_rmsd GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391920 2020-07-21 01:13:13.743311 12
26 surfactant_tail_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391917 2020-07-21 01:13:13.743309 12
27 surfactant_head_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391915 2020-07-21 01:13:13.743306 12
28 surfactant_head_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391912 2020-07-21 01:13:13.743304 12
29 substrate_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391910 2020-07-21 01:13:13.743302 12
30 substrate_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391908 2020-07-21 01:13:13.743299 12
31 substrate_substrate_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391905 2020-07-21 01:13:13.743297 12
32 counterion_surfactant_tail_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391903 2020-07-21 01:13:13.743295 12
33 counterion_surfactant_head_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391900 2020-07-21 01:13:13.743292 12
34 counterion_substrate_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391898 2020-07-21 01:13:13.743290 12
35 counterion_counterion_rdf GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391896 2020-07-21 01:13:13.743288 12
36 index_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391893 2020-07-21 01:13:13.743285 12
37 topology_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391891 2020-07-21 01:13:13.743283 12
38 data_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391888 2020-07-21 01:13:13.743280 12
39 trajectory_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391886 2020-07-21 01:13:13.743278 12
40 energy_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391883 2020-07-21 01:13:13.743275 12
41 log_file GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.391877 2020-07-21 01:13:13.743269 12
42 mp4_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378962 2020-07-21 01:13:13.729390 12
43 surfactant_tail_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378955 2020-07-21 01:13:13.729388 12
44 surfactant_head_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378948 2020-07-21 01:13:13.729385 12
45 substrate_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378942 2020-07-21 01:13:13.729383 12
46 counterion_rmsd GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378935 2020-07-21 01:13:13.729380 12
47 surfactant_tail_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378929 2020-07-21 01:13:13.729378 12
48 surfactant_head_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378922 2020-07-21 01:13:13.729376 13
49 surfactant_head_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378915 2020-07-21 01:13:13.729373 13
50 substrate_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378909 2020-07-21 01:13:13.729371 13
51 substrate_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378902 2020-07-21 01:13:13.729368 13
52 substrate_substrate_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378895 2020-07-21 01:13:13.729366 13
53 counterion_surfactant_tail_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378889 2020-07-21 01:13:13.729364 13
54 counterion_surfactant_head_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378882 2020-07-21 01:13:13.729361 13
55 counterion_substrate_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378875 2020-07-21 01:13:13.729359 13
56 counterion_counterion_rdf GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378868 2020-07-21 01:13:13.729356 13
57 index_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378861 2020-07-21 01:13:13.729354 13
58 topology_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378854 2020-07-21 01:13:13.729352 13
59 data_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378848 2020-07-21 01:13:13.729349 13
60 trajectory_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378841 2020-07-21 01:13:13.729347 13
61 energy_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378833 2020-07-21 01:13:13.729344 13
62 log_file GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.378821 2020-07-21 01:13:13.729338 13
63 mp4_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365566 2020-07-21 01:13:13.714240 12
64 surfactant_tail_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365564 2020-07-21 01:13:13.714238 12
65 surfactant_head_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365561 2020-07-21 01:13:13.714235 12
66 substrate_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365558 2020-07-21 01:13:13.714233 12
67 counterion_rmsd GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365556 2020-07-21 01:13:13.714230 12
68 surfactant_tail_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365554 2020-07-21 01:13:13.714228 12
69 surfactant_head_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365551 2020-07-21 01:13:13.714225 12
70 surfactant_head_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365549 2020-07-21 01:13:13.714223 12
71 substrate_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365546 2020-07-21 01:13:13.714220 12
72 substrate_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365544 2020-07-21 01:13:13.714218 12
73 substrate_substrate_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365542 2020-07-21 01:13:13.714215 12
74 counterion_surfactant_tail_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365539 2020-07-21 01:13:13.714213 12
75 counterion_surfactant_head_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365537 2020-07-21 01:13:13.714210 12
76 counterion_substrate_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365535 2020-07-21 01:13:13.714208 12
77 counterion_counterion_rdf GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365532 2020-07-21 01:13:13.714205 12
78 topology_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365530 2020-07-21 01:13:13.714203 12
79 data_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365527 2020-07-21 01:13:13.714200 12
80 trajectory_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365525 2020-07-21 01:13:13.714198 12
81 energy_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365522 2020-07-21 01:13:13.714194 12
82 log_file GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.365516 2020-07-21 01:13:13.714189 12
83 topology_file GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353316 2020-07-21 01:13:13.700739 12
84 data_file GromacsSolvate:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353308 2020-07-21 01:13:13.700735 12
85 mp4_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353057 2020-07-21 01:13:13.700579 12
86 surfactant_tail_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353054 2020-07-21 01:13:13.700576 12
87 surfactant_head_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353052 2020-07-21 01:13:13.700574 12
88 substrate_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353049 2020-07-21 01:13:13.700571 12
89 counterion_rmsd GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353047 2020-07-21 01:13:13.700569 12
90 surfactant_tail_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353044 2020-07-21 01:13:13.700567 12
91 surfactant_head_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353042 2020-07-21 01:13:13.700564 12
92 surfactant_head_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353039 2020-07-21 01:13:13.700561 12
93 substrate_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353037 2020-07-21 01:13:13.700559 12
94 substrate_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353034 2020-07-21 01:13:13.700556 12
95 substrate_substrate_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353032 2020-07-21 01:13:13.700554 12
96 counterion_surfactant_tail_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353029 2020-07-21 01:13:13.700551 12
97 counterion_surfactant_head_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353027 2020-07-21 01:13:13.700549 12
98 counterion_substrate_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353025 2020-07-21 01:13:13.700546 12
99 counterion_counterion_rdf GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353022 2020-07-21 01:13:13.700544 12
100 topology_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353020 2020-07-21 01:13:13.700541 12
101 pullx_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353017 2020-07-21 01:13:13.700539 12
102 pullf_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353015 2020-07-21 01:13:13.700536 12
103 data_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353012 2020-07-21 01:13:13.700534 12
104 compressed_trajectory_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353010 2020-07-21 01:13:13.700531 12
105 trajectory_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353007 2020-07-21 01:13:13.700529 12
106 energy_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.353004 2020-07-21 01:13:13.700526 12
107 log_file GromacsPull:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.352999 2020-07-21 01:13:13.700520 12
108 input_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.340217 2020-07-21 01:13:13.686957 12
109 index_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.340215 2020-07-21 01:13:13.686954 12
110 topology_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.340212 2020-07-21 01:13:13.686952 12
111 data_file GromacsPullPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.340208 2020-07-21 01:13:13.686948 12
112 mp4_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339894 2020-07-21 01:13:13.686626 12
113 surfactant_tail_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339892 2020-07-21 01:13:13.686624 12
114 surfactant_head_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339889 2020-07-21 01:13:13.686621 12
115 substrate_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339887 2020-07-21 01:13:13.686619 12
116 counterion_rmsd GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339884 2020-07-21 01:13:13.686617 12
117 surfactant_tail_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339882 2020-07-21 01:13:13.686614 12
118 surfactant_head_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339880 2020-07-21 01:13:13.686612 12
119 surfactant_head_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339877 2020-07-21 01:13:13.686610 12
120 substrate_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339875 2020-07-21 01:13:13.686607 12
121 substrate_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339873 2020-07-21 01:13:13.686605 12
122 substrate_substrate_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339870 2020-07-21 01:13:13.686603 12
123 counterion_surfactant_tail_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339868 2020-07-21 01:13:13.686601 12
124 counterion_surfactant_head_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339865 2020-07-21 01:13:13.686598 12
125 counterion_substrate_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339863 2020-07-21 01:13:13.686596 12
126 counterion_counterion_rdf GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339860 2020-07-21 01:13:13.686593 12
127 data_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339858 2020-07-21 01:13:13.686591 12
128 trajectory_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339855 2020-07-21 01:13:13.686589 12
129 energy_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339852 2020-07-21 01:13:13.686586 12
130 log_file GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.339847 2020-07-21 01:13:13.686580 12
131 restraint_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.327768 2020-07-21 01:13:13.671310 12
132 topology_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.327765 2020-07-21 01:13:13.671303 12
133 data_file GromacsPrep:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.327761 2020-07-21 01:13:13.671294 12
134 png_file SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.327462 2020-07-21 01:13:13.670757 12
135 data_file SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.327455 2020-07-21 01:13:13.670746 12
136 png_file PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.288523 2020-07-21 01:13:12.288523 1
137 indenter_file PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.288514 2020-07-21 01:13:12.288514 1
138 png_file SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.254026 2020-07-21 01:13:12.254026 1
139 indenter_file SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.254018 2020-07-21 01:13:12.254018 1
140 png_file IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.200392 2020-07-21 01:13:12.200392 1
141 indenter_file IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad NaN 2020-07-21 01:13:12.200383 2020-07-21 01:13:12.200383 1
142 input GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles renumber_png.sh None None 1
143 input GromacsEnergyMinimizationAfterSolvation:GromacsEnergyMinimizationAfterSolvationMain:push_infiles em_solvated.mdp None None 1
144 input GromacsEnergyMinimization:GromacsTrajectoryVisualization:push_infiles movie.pml.template None None 1
145 input GromacsEnergyMinimization:GromacsEnergyMinimizationMain:push_infiles em.mdp None None 1
146 input GromacsNVTEquilibration:GromacsNVTEquilibrationMain:push_infiles nvt.mdp None None 1
147 surfactant_file SurfactantMoleculeMeasures:SurfactantMoleculeMeasuresMain:push_infiles 1_SDS.pdb None None 1
148 indenter_file IndenterBoundingSphere:IndenterBoundingSphereMain:push_infiles AU_111_r_25.pdb None None 1
149 template SphericalSurfactantPacking:push_infiles sphere.inp.template None None 1
150 data SphericalSurfactantPacking:push_infiles 1_NA.pdb None None 1
151 input GromacsPullPrep:GromacsPullPrepMain:push_infiles sys.top.template None None 1
152 input GromacsNPTEquilibration:GromacsNPTEquilibrationMain:push_infiles npt.mdp None None 1
153 input GromacsRelaxation:GromacsRelaxationMain:push_infiles relax.mdp None None 1
154 input GromacsPullPrep:GromacsPullPrepMain:push_infiles pull.mdp.template None None 1

Overview on images by distinct steps

In [41]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
}
In [42]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[42]:
15
In [43]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [44]:
res_df
Out[44]:
step type name earliest latest object_count
0 SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-21 01:13:12.327462 2020-07-21 01:13:13.670757 12
1 PackingConstraintSpheres:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-21 01:13:12.288523 2020-07-21 01:13:12.288523 1
2 SurfactantMoleculeMeasures:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-21 01:13:12.254026 2020-07-21 01:13:12.254026 1
3 IndenterBoundingSphere:ProcessAnalyzeAndVisualize:push_filepad png_file NaN 2020-07-21 01:13:12.200392 2020-07-21 01:13:12.200392 1
In [45]:
res_df["step"][0]
Out[45]:
'SphericalSurfactantPacking:ProcessAnalyzeAndVisualize:push_filepad'

Packing visualization

Indenter bounding sphere

In [46]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'IndenterBoundingSphere'}
}
In [47]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[47]:
1
In [48]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [49]:
for obj in obj_list:
    display(obj)

Surfactant measures

In [50]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'SurfactantMoleculeMeasures'}
}
In [51]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[51]:
1
In [52]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [53]:
obj_list[0]
Out[53]:

Packing constraints

In [54]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'PackingConstraintSpheres'}
}
In [55]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[55]:
1
In [56]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
.
In [57]:
obj_list[0]
Out[57]:

Packed film

In [58]:
query = {
    'metadata.project': project_id,
    'metadata.type': 'png_file',
    'metadata.step': {'$regex': 'SphericalSurfactantPacking'}
}
In [59]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[59]:
12
In [60]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
        tmp.write(content)
        obj_list.append(Image(filename=tmp.name)) 
    print('.',end='')
............
In [61]:
for obj in obj_list:
    display(obj)

Energy minimization analysis

Overview on objects in step

In [62]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad'
}
In [63]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[63]:
228
In [64]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [65]:
res_df
Out[65]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.339894 2020-07-21 01:13:13.686626 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.339892 2020-07-21 01:13:13.686624 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.339889 2020-07-21 01:13:13.686621 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.339887 2020-07-21 01:13:13.686619 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.339884 2020-07-21 01:13:13.686617 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.339882 2020-07-21 01:13:13.686614 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.339880 2020-07-21 01:13:13.686612 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.339877 2020-07-21 01:13:13.686610 12
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.339875 2020-07-21 01:13:13.686607 12
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.339873 2020-07-21 01:13:13.686605 12
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.339870 2020-07-21 01:13:13.686603 12
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.339868 2020-07-21 01:13:13.686601 12
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.339865 2020-07-21 01:13:13.686598 12
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.339863 2020-07-21 01:13:13.686596 12
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.339860 2020-07-21 01:13:13.686593 12
15 data_file NaN 2020-07-21 01:13:12.339858 2020-07-21 01:13:13.686591 12
16 trajectory_file NaN 2020-07-21 01:13:12.339855 2020-07-21 01:13:13.686589 12
17 energy_file NaN 2020-07-21 01:13:12.339852 2020-07-21 01:13:13.686586 12
18 log_file NaN 2020-07-21 01:13:12.339847 2020-07-21 01:13:13.686580 12

Global observables

In [66]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',  #{'$regex': 'GromacsEnergyMinimization'}
    "metadata.type": 'energy_file',
}
fp.filepad.count_documents(query)
Out[66]:
12
In [67]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [68]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [69]:
[ c for c in cursor]
Out[69]:
[{'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f164f4d7dc9cfbf44a2996f'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f165f3a7dc9cfbf44a2f7ff'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f1632947dc9cfbf44a1d052'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f1641167dc9cfbf44a22c0a'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f162f907dc9cfbf44a1cbd7'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f162e7b7dc9cfbf44a1c8f4'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f16500a7dc9cfbf44a29a3f'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f162d897dc9cfbf44a1c7e5'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f1655ce7dc9cfbf44a2b729'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f166fca7dc9cfbf44a36b07'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f1674767dc9cfbf44a39ed1'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f167bc17dc9cfbf44a3b977'}]
In [70]:
res_mi_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        em_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),em_df.index],
            names=[*c["_id"].keys(),'step'])
        em_mi_df = em_df.set_index(mi)        
        res_mi_list.append(em_mi_df)
    print('.',end='')
print('')

res_mi_df = pd.concat(res_mi_list)
res_df = res_mi_df.reset_index()
............
In [71]:
res_mi_df
Out[71]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
nmolecules step
44 0.0 0.0 17870.773438 10515.197266 1631.395508 1011.865295 11690.691406 -583627.3750 -18877.916016 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1.0 1.0 17581.853516 10477.472656 1631.442139 1008.043762 11688.764648 -583627.3750 -18878.417969 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2.0 2.0 17238.019531 10432.553711 1631.510254 1003.449646 11686.456055 -583627.2500 -18879.031250 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3.0 3.0 16829.621094 10379.159180 1631.608765 997.923401 11683.688477 -583627.1875 -18879.783203 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4.0 4.0 16345.567383 10315.792969 1631.750732 991.267944 11680.367188 -583627.0625 -18880.710938 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 9995.0 9995.0 138.642334 7118.208008 2281.199951 517.900940 22626.222656 -582644.4375 -55745.808594 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9996.0 9996.0 139.351089 7121.719727 2281.218262 517.639038 22625.919922 -582645.4375 -55747.671875 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9997.0 9997.0 139.446945 7117.706543 2281.231689 517.956787 22626.304688 -582643.6875 -55747.363281 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
9999.0 9999.0 138.082321 7119.891113 2281.240723 517.767578 22626.089844 -582644.5000 -55748.457031 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
10000.0 10000.0 140.254425 7117.438477 2281.290039 518.000366 22626.398438 -582642.5625 -55750.425781 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

93789 rows × 31 columns

In [72]:
res_df
Out[72]:
nmolecules step Time Bond U-B Proper Dih. LJ-14 Coulomb-14 ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
0 44 0.0 0.0 17870.773438 10515.197266 1631.395508 1011.865295 11690.691406 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 44 1.0 1.0 17581.853516 10477.472656 1631.442139 1008.043762 11688.764648 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 44 2.0 2.0 17238.019531 10432.553711 1631.510254 1003.449646 11686.456055 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 44 3.0 3.0 16829.621094 10379.159180 1631.608765 997.923401 11683.688477 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 44 4.0 4.0 16345.567383 10315.792969 1631.750732 991.267944 11680.367188 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93784 88 9995.0 9995.0 138.642334 7118.208008 2281.199951 517.900940 22626.222656 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93785 88 9996.0 9996.0 139.351089 7121.719727 2281.218262 517.639038 22625.919922 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93786 88 9997.0 9997.0 139.446945 7117.706543 2281.231689 517.956787 22626.304688 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93787 88 9999.0 9999.0 138.082321 7119.891113 2281.240723 517.767578 22626.089844 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93788 88 10000.0 10000.0 140.254425 7117.438477 2281.290039 518.000366 22626.398438 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

93789 rows × 33 columns

In [73]:
y_quantities = [
    'Potential',
    'Pressure',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

positions = [
    (0,0),
    (0,1),
    (1,0),
    (2,0),
    (2,1),
]
fig, ax = plt.subplots(3,2,figsize=(10,12))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [74]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [75]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[75]:
12
In [76]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [77]:
res_df
Out[77]:
step type name earliest latest object_count
0 GromacsEnergyMinimization:ProcessAnalyzeAndVisualize:push_filepad mp4_file NaN 2020-07-21 01:13:12.339894 2020-07-21 01:13:13.686626 12
In [78]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_list.append(Video.from_file(tmp.name))
    print('.',end='')
............
In [79]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [80]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(481,)
(438,)
(394,)
(350,)
(306,)
(263,)
(219,)
(175,)
(131,)
(88,)
(44,)

Pulling analysis

Overview on objects in step

In [81]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad'
}
In [82]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[82]:
276
In [83]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [84]:
res_df
Out[84]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.353057 2020-07-21 01:13:13.700579 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.353054 2020-07-21 01:13:13.700576 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.353052 2020-07-21 01:13:13.700574 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.353049 2020-07-21 01:13:13.700571 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.353047 2020-07-21 01:13:13.700569 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.353044 2020-07-21 01:13:13.700567 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.353042 2020-07-21 01:13:13.700564 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.353039 2020-07-21 01:13:13.700561 12
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.353037 2020-07-21 01:13:13.700559 12
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.353034 2020-07-21 01:13:13.700556 12
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.353032 2020-07-21 01:13:13.700554 12
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.353029 2020-07-21 01:13:13.700551 12
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.353027 2020-07-21 01:13:13.700549 12
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.353025 2020-07-21 01:13:13.700546 12
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.353022 2020-07-21 01:13:13.700544 12
15 topology_file NaN 2020-07-21 01:13:12.353020 2020-07-21 01:13:13.700541 12
16 pullx_file NaN 2020-07-21 01:13:12.353017 2020-07-21 01:13:13.700539 12
17 pullf_file NaN 2020-07-21 01:13:12.353015 2020-07-21 01:13:13.700536 12
18 data_file NaN 2020-07-21 01:13:12.353012 2020-07-21 01:13:13.700534 12
19 compressed_trajectory_file NaN 2020-07-21 01:13:12.353010 2020-07-21 01:13:13.700531 12
20 trajectory_file NaN 2020-07-21 01:13:12.353007 2020-07-21 01:13:13.700529 12
21 energy_file NaN 2020-07-21 01:13:12.353004 2020-07-21 01:13:13.700526 12
22 log_file NaN 2020-07-21 01:13:12.352999 2020-07-21 01:13:13.700520 12

Global observables

The gmx energy table:

  1  Restraint-Pot.   2  U-B              3  Proper-Dih.      4  LJ-14         
  5  Coulomb-14       6  LJ-(SR)          7  Coulomb-(SR)     8  Coul.-recip.  
  9  Position-Rest.  10  COM-Pull-En.    11  Potential       12  Kinetic-En.   
 13  Total-Energy    14  Temperature     15  Pressure        16  Constr.-rmsd  
 17  Vir-XX          18  Vir-XY          19  Vir-XZ          20  Vir-YX        
 21  Vir-YY          22  Vir-YZ          23  Vir-ZX          24  Vir-ZY        
 25  Vir-ZZ          26  Pres-XX         27  Pres-XY         28  Pres-XZ       
 29  Pres-YX         30  Pres-YY         31  Pres-YZ         32  Pres-ZX       
 33  Pres-ZY         34  Pres-ZZ         35  #Surf*SurfTen   36  T-rest

converted to dict with regex

 \s+([0-9]+)\s+([^\s]+)

and replacement

 '$2': $1,\n
In [85]:
gmx_energy_dict = {
    'Restraint-Pot.': 1,
    'U-B': 2,
    'Proper-Dih.': 3,
    'LJ-14': 4,
    'Coulomb-14': 5,
    'LJ-(SR)': 6,
    'Coulomb-(SR)': 7,
    'Coul.-recip.': 8,
    'Position-Rest.': 9,
    'COM-Pull-En.': 10,
    'Potential': 11,
    'Kinetic-En.': 12,
    'Total-Energy': 13,
    'Temperature': 14,
    'Pressure': 15,
    'Constr.-rmsd': 16,
    'Vir-XX': 17,
    'Vir-XY': 18,
    'Vir-XZ': 19,
    'Vir-YX': 20,
    'Vir-YY': 21,
    'Vir-YZ': 22,
    'Vir-ZX': 23,
    'Vir-ZY': 24,
    'Vir-ZZ': 25,
    'Pres-XX': 26,
    'Pres-XY': 27,
    'Pres-XZ': 28,
    'Pres-YX': 29,
    'Pres-YY': 30,
    'Pres-YZ': 31,
    'Pres-ZX': 32,
    'Pres-ZY': 33,
    'Pres-ZZ': 34,
    '#Surf*SurfTen': 35,
    'T-rest': 36,
}
In [86]:
query = { 
    "metadata.project": project_id,
    "metadata.type":    'energy_file',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[86]:
12
In [87]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [88]:
gmx_energy_selection = [
    'Restraint-Pot.',
    'Position-Rest.',
    'COM-Pull-En.',
    'Potential',
    'Kinetic-En.',
    'Total-Energy',
    'Temperature',
    'Pressure',
    'Constr.-rmsd',
]
In [89]:
res_list = []
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

res_df_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    
    #df = panedr.edr_to_df(tmp.name), fails
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.edr', delete=False)
    
    # cur_res_dict = {}
    with tmpin:
        tmpin.write(content)
        #tmpin.seek(0)
       
    res_df = None
    for sel in gmx_energy_selection:  
        try:
            tmpout = tempfile.NamedTemporaryFile(suffix='.xvg', delete=False)
            res = gromacs.energy(f=tmpin.name,o=tmpout.name,
                                 input=str(gmx_energy_dict[sel]))
            #with open(tmpout.name,'r') as f:
            #    xvg = f.read()
            #tmpout.delete()
            xvg = mda.auxiliary.XVG.XVGReader(tmpout.name)
            xvg_time = xvg.read_all_times()
            xvg_data = np.array([ f.data[1:] for f in xvg ]).flatten() # 1st entry contains times
            os.unlink(tmpout.name)
        except: 
            logger.warning("Failed to read '{:s}' from data set {:d}.".format(sel,i))
            failed_list.append((nmolecules, sel))
        else:
            r = {'nmolecules': [nmolecules]*len(xvg_time), 'time': xvg_time, sel: xvg_data}
            cur_df = pd.DataFrame(r)
            if res_df is None:
                res_df = cur_df
            else:
                res_df = pd.merge(res_df, cur_df, how='outer', on=['nmolecules', 'time'])
    res_df_list.append(res_df)
    os.unlink(tmpin.name)
    print('.',end='')
print('')
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
............
In [90]:
res_df_mi
Out[90]:
Restraint-Pot. Position-Rest. COM-Pull-En. Potential Kinetic-En. Total-Energy Temperature Pressure Constr.-rmsd
nmolecules time
306 0.00 7031.586914 0.000000 0.016863 -671124.8750 58.650429 -671066.2500 0.523976 3025.780029 0.000002
0.02 3245.086182 0.040132 41.408371 -674064.5625 2971.016113 -671093.5625 26.542702 2949.398438 0.000002
0.04 889.867371 1.412355 268.745911 -675391.7500 4292.286621 -671099.4375 38.346775 2895.226074 0.000002
0.06 389.765137 7.575518 415.137787 -675597.5625 4491.179199 -671106.3750 40.123657 2882.516602 0.000002
0.08 300.085297 19.875889 393.292969 -675509.5000 4396.767578 -671112.7500 39.280193 2876.513672 0.000002
... ... ... ... ... ... ... ... ... ... ...
44 1.92 32.139790 715.891907 115.093407 -587792.1875 1695.226074 -586096.9375 105.396149 2990.895264 0.000002
1.94 37.243038 714.876648 109.428764 -587826.5000 1733.902222 -586092.6250 107.800735 2987.830811 0.000002
1.96 39.406776 720.091187 108.039062 -587847.4375 1759.293701 -586088.1250 109.379379 2990.757324 0.000002
1.98 40.590767 726.882629 107.895409 -587891.2500 1807.384766 -586083.8750 112.369308 2998.117920 0.000002
2.00 40.944344 731.684814 95.964859 -587859.8750 1781.397583 -586078.5000 110.753632 3003.758789 0.000003

1212 rows × 9 columns

In [91]:
cols = 2
y_quantities = [
    'Restraint-Pot.',
    'Position-Rest.',
    'COM-Pull-En.',
    'Potential',
    'Kinetic-En.',
    'Total-Energy',
    'Temperature',
    'Pressure',
    'Constr.-rmsd',
    ]
n = len(y_quantities)
rows = round(n/cols)
positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Pulling forces

In [92]:
res_df_list = []
failed_list = []

query = { 
    "metadata.project": project_id,
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
    "metadata.type": 'pullf_file',
}

fp.filepad.count_documents(query)
match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.xvg', delete=False)
    
    with tmpin:
        tmpin.write(content)
        
    try:
        xvg = mda.auxiliary.XVG.XVGReader(tmpin.name)
        xvg_time = xvg.read_all_times()
        xvg_data = np.array([ f.data[1:] for f in xvg ])# .flatten() # 1st entry contains times
    except: 
        logger.warning("Failed to read data set {:d}.".format(i))
        failed_list.append(nmolecules)
    else:
        res_df_list.append(pd.DataFrame({
            'nmolecules': np.array([nmolecules]*len(xvg_time), dtype=int),
            'time': xvg_time, 
            **{i: xvg_data[:,i] for i in range(nmolecules)}
        }))
    os.unlink(tmpin.name)
    print('.',end='')
print('')
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
............
In [93]:
# pulling forces
n = len(res_df['nmolecules'].unique())
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
for pos, (key, grp) in zip(positions,res_df.groupby(['nmolecules'])):
    columns = list(set(grp.columns) - set(['nmolecules','time']))
    grp.plot('time', columns, ax=ax[pos],title=key,legend=None)
fig.tight_layout()
In [94]:
# mean pulling force
fig, ax = plt.subplots(1,1,figsize=(5,4))
for key, grp in res_df.groupby(['nmolecules']):
    columns = list(set(grp.columns) - set(['nmolecules','time']))
    grp = grp.set_index('time')
    grp = grp.drop(columns='nmolecules')
    grp.mean(axis=1).plot(legend=True, label=key, ax=ax)
fig.tight_layout()
#fig.legend()

Pulling groups movement

In [95]:
res_df_list = []
failed_list = []

query = { 
    "metadata.project": project_id,
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
    "metadata.type":    'pullx_file',
}

fp.filepad.count_documents(query)
match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)


for i, c in enumerate(cursor): 
    print(c["_id"])
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])  # int(metadata["metadata"]["nmolecules"])
    
    tmpin = tempfile.NamedTemporaryFile(mode='w+b',suffix='.xvg', delete=False)
    
    with tmpin:
        tmpin.write(content)
        
    try:
        xvg = gromacs.fileformats.XVG(tmpin.name)
        xvg_time = xvg.array[0,:]
        
        #xvg_labels = ['1', '1 ref', '1 dX', '1 dY', '1 dZ', '1 g 1 X', '1 g 1 Y', '1 g 1 Z', '1 g 2 X', '1 g 2 Y', '1 g 2 Z']
        N_pull_coords = nmolecules
        N_cols = len(xvg.names)
        N_cols_per_coord = int(N_cols / N_pull_coords)
        
        xvg_labels = xvg.names[:N_cols_per_coord]
        xvg_data = {}
        for j in range(N_pull_coords):
            for k in range(N_cols_per_coord):
                xvg_data[(j,xvg_labels[k])] = xvg.array[
                    1+j*N_cols_per_coord+k,:]
        
    except: 
        logger.exception("Failed to read data set {:d}.".format(i))
        failed_list.append(nmolecules)

    else:
        res_df_list.append(pd.DataFrame({
            'nmolecules': np.array([nmolecules]*len(xvg_time), dtype=int),
            'time': xvg_time, 
            **xvg_data # {i: xvg_data[:,i] for i in range(nmolecules)}
        }))
    os.unlink(tmpin.name)
res_df = pd.concat(res_df_list)
res_df_mi = res_df.set_index(['nmolecules','time'])
res_df_mi.columns = pd.MultiIndex.from_tuples(res_df_mi.columns, names=['nmolecule', 'coord'])
{'nmolecules': 525}
{'nmolecules': 394}
{'nmolecules': 438}
{'nmolecules': 350}
{'nmolecules': 263}
{'nmolecules': 175}
{'nmolecules': 131}
{'nmolecules': 44}
{'nmolecules': 306}
{'nmolecules': 88}
{'nmolecules': 481}
{'nmolecules': 219}
In [96]:
res_df_mi
Out[96]:
nmolecule 0 ... 524
coord 1 1 ref 1 dX 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z ... 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z 1 g 2 X 1 g 2 Y 1 g 2 Z
nmolecules time
525 0.0 3.04277 3.04264 -3.012120 -0.408986 0.135390 6.778 6.802 6.763 ... -0.989800 2.01098 6.778 6.802 6.763 4.71159 5.81220 8.77398
0.2 3.02987 3.02264 -2.997960 -0.408835 0.158761 6.778 6.802 6.763 ... -0.953240 2.00705 6.778 6.802 6.763 4.72044 5.84876 8.77005
0.4 3.01117 3.00264 -2.974230 -0.430042 0.190234 6.778 6.802 6.763 ... -0.975928 1.99767 6.778 6.802 6.763 4.74648 5.82607 8.76067
0.6 2.97040 2.98264 -2.931880 -0.424803 0.216552 6.778 6.802 6.763 ... -0.958169 1.97888 6.778 6.802 6.763 4.76680 5.84383 8.74188
0.8 2.95784 2.96264 -2.927250 -0.351821 0.237092 6.778 6.802 6.763 ... -0.957422 1.97881 6.778 6.802 6.763 4.79279 5.84458 8.74181
1.0 2.93029 2.94264 -2.903060 -0.248654 0.311457 6.778 6.802 6.763 ... -0.939117 1.96459 6.778 6.802 6.763 4.80662 5.86288 8.72759
1.2 2.91763 2.92264 -2.895960 -0.235826 0.265267 6.778 6.802 6.763 ... -0.951476 1.94226 6.778 6.802 6.763 4.82482 5.85052 8.70526
1.4 2.91436 2.90264 -2.892570 -0.184162 0.304320 6.778 6.802 6.763 ... -0.934430 1.96228 6.778 6.802 6.763 4.84116 5.86757 8.72528
1.6 2.89784 2.88264 -2.880380 -0.148715 0.280632 6.778 6.802 6.763 ... -0.899018 1.96046 6.778 6.802 6.763 4.84962 5.90298 8.72346
1.8 2.86755 2.86264 -2.849370 -0.157306 0.281413 6.778 6.802 6.763 ... -0.868220 1.98504 6.778 6.802 6.763 4.89189 5.93378 8.74804
2.0 2.85090 2.84264 -2.839810 -0.153754 0.198643 6.778 6.802 6.763 ... -0.848802 1.98689 6.778 6.802 6.763 4.91739 5.95320 8.74989
394 0.0 3.05858 3.05857 1.789350 2.047640 1.400090 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.04088 3.03857 1.779700 2.034340 1.393240 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 3.02171 3.01857 1.768700 2.027730 1.375040 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 3.00259 2.99857 1.740630 2.033640 1.360160 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.97199 2.97857 1.720970 2.024300 1.331620 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.94701 2.95857 1.692370 2.013770 1.328710 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.93167 2.93857 1.673210 2.016410 1.314980 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.91156 2.91857 1.653800 1.972770 1.360270 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.91143 2.89857 1.675850 1.939350 1.380890 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.89987 2.87857 1.662630 1.938180 1.374190 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.85929 2.85857 1.657670 1.902710 1.344380 6.749 6.708 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
438 0.0 3.06981 3.06970 -1.695070 -0.217678 2.550120 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.05348 3.04970 -1.693840 -0.206201 2.532210 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 3.03029 3.02970 -1.679390 -0.212995 2.513350 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 3.01049 3.00970 -1.651080 -0.232905 2.506540 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.98448 2.98970 -1.625860 -0.243347 2.490880 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.97258 2.96970 -1.629800 -0.202889 2.477660 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.95605 2.94970 -1.645190 -0.176932 2.449540 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.93565 2.92970 -1.609450 -0.180835 2.448470 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.92409 2.90970 -1.584400 -0.207038 2.448900 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.88338 2.88970 -1.591590 -0.226784 2.393590 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.88183 2.86970 -1.582800 -0.263782 2.393770 6.789 6.821 6.695 ... NaN NaN NaN NaN NaN NaN NaN NaN
350 0.0 2.91510 2.91528 1.507870 -1.258800 -2.153960 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.90488 2.89528 1.529000 -1.219100 -2.148080 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.84159 2.87528 1.530890 -1.155010 -2.096890 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.81071 2.85528 1.512660 -1.133550 -2.080150 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.80978 2.83528 1.507760 -1.118860 -2.090380 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.78623 2.81528 1.499580 -1.095290 -2.077190 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.81164 2.79528 1.508110 -1.089030 -2.108300 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.79608 2.77528 1.510560 -1.114200 -2.072390 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.77875 2.75528 1.514690 -1.105710 -2.050500 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.74687 2.73528 1.542850 -1.104440 -1.986230 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.74250 2.71528 1.554230 -1.109970 -1.968160 6.720 6.803 6.789 ... NaN NaN NaN NaN NaN NaN NaN NaN
263 0.0 3.09102 3.09096 1.908980 -1.902280 1.513770 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.08303 3.07096 1.897440 -1.899630 1.515330 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 3.06393 3.05096 1.884530 -1.895090 1.498270 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 3.03792 3.03096 1.861010 -1.898400 1.470270 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 3.00997 3.01096 1.835840 -1.871290 1.479160 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 3.01847 2.99096 1.830880 -1.825480 1.557770 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.99438 2.97096 1.814530 -1.797550 1.562890 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.94768 2.95096 1.776810 -1.786610 1.529630 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.93524 2.93096 1.777020 -1.753860 1.543310 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.93640 2.91096 1.773920 -1.746710 1.557130 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.89080 2.89096 1.741280 -1.774530 1.475040 6.719 6.928 6.938 ... NaN NaN NaN NaN NaN NaN NaN NaN
175 0.0 2.98740 2.98736 -1.122020 2.768040 -0.059902 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.97408 2.96736 -1.123540 2.753070 -0.058330 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.95666 2.94736 -1.117920 2.736390 -0.065259 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.94137 2.92736 -1.130220 2.714980 -0.056259 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.91696 2.90736 -1.142280 2.683790 -0.033462 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.89288 2.88736 -1.123830 2.665650 -0.008771 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.87267 2.86736 -1.129610 2.641120 0.026109 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.84310 2.84736 -1.133510 2.606840 0.052546 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.80902 2.82736 -1.112690 2.578840 0.045894 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.79277 2.80736 -1.078440 2.574780 0.083585 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.78704 2.78736 -1.093620 2.562340 0.077222 6.715 6.790 6.825 ... NaN NaN NaN NaN NaN NaN NaN NaN
131 0.0 3.09045 3.09024 -0.850902 -1.544340 2.538080 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.07859 3.07024 -0.839331 -1.542740 2.528470 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 3.06101 3.05024 -0.847569 -1.536360 2.508190 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 3.03237 3.03024 -0.851869 -1.515500 2.484520 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 3.00622 3.01024 -0.876325 -1.489750 2.459690 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.99657 2.99024 -0.872745 -1.444920 2.475870 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.98199 2.97024 -0.872804 -1.391780 2.488660 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.95146 2.95024 -0.910132 -1.348670 2.462490 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.93310 2.93024 -0.985504 -1.304640 2.435110 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.89833 2.91024 -1.065910 -1.269290 2.377610 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.90315 2.89024 -1.210220 -1.294830 2.299370 6.785 6.624 6.675 ... NaN NaN NaN NaN NaN NaN NaN NaN
44 0.0 2.87939 2.87932 2.060960 1.771370 -0.951617 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.86783 2.85932 2.040840 1.776020 -0.951409 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.85606 2.83932 2.051480 1.746500 -0.947749 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.85435 2.81932 2.087230 1.707210 -0.936068 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.83880 2.79932 2.085300 1.679200 -0.943723 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.82847 2.77932 2.072130 1.663470 -0.969229 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.80850 2.75932 2.035380 1.663100 -0.989450 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.78703 2.73932 1.993030 1.647290 -1.040100 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.78783 2.71932 1.982900 1.656520 -1.046910 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.76353 2.69932 1.962760 1.611780 -1.089430 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.73019 2.67932 1.842040 1.650360 -1.156340 6.660 6.627 6.730 ... NaN NaN NaN NaN NaN NaN NaN NaN
306 0.0 2.99688 2.99656 2.234560 -1.989880 0.168659 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.97925 2.97656 2.211300 -1.988990 0.173106 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.95516 2.95656 2.200380 -1.964280 0.181320 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.92743 2.93656 2.182050 -1.938710 0.223479 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.89162 2.91656 2.168720 -1.900190 0.217636 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.86808 2.89656 2.178040 -1.854240 0.209316 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.87530 2.87656 2.253140 -1.777560 0.176006 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.86280 2.85656 2.276050 -1.725690 0.192927 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.83452 2.83656 2.271610 -1.677400 0.246300 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.82773 2.81656 2.250710 -1.695120 0.238560 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.81095 2.79656 2.243790 -1.667140 0.295791 6.785 6.759 6.790 ... NaN NaN NaN NaN NaN NaN NaN NaN
88 0.0 2.96425 2.96426 2.212950 0.236086 -1.958030 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 2.92867 2.94426 2.223590 0.251374 -1.889320 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.92826 2.92426 2.242730 0.263759 -1.864220 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.91728 2.90426 2.234980 0.290677 -1.852260 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.88076 2.88426 2.200240 0.262680 -1.840850 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.83341 2.86426 2.139580 0.235171 -1.842590 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.81403 2.84426 2.115710 0.215252 -1.842880 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.80890 2.82426 2.089990 0.163857 -1.869500 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.79677 2.80426 2.004410 0.190867 -1.941090 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.77417 2.78426 1.916760 0.124871 -2.001610 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.75556 2.76426 1.838440 0.051715 -2.051960 6.624 6.903 6.676 ... NaN NaN NaN NaN NaN NaN NaN NaN
481 0.0 3.02906 3.02901 2.669190 0.972790 -1.050880 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.01282 3.00901 2.652500 0.961100 -1.057180 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 2.99702 2.98901 2.629840 0.947392 -1.080960 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 2.97670 2.96901 2.613230 0.903125 -1.102790 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 2.94319 2.94901 2.609950 0.822523 -1.083490 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.93229 2.92901 2.626970 0.805054 -1.024320 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.92167 2.90901 2.641930 0.794171 -0.962114 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.89589 2.88901 2.627770 0.756169 -0.953522 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.88875 2.86901 2.606340 0.751238 -0.993734 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.83390 2.84901 2.567070 0.659165 -1.003290 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.83436 2.82901 2.553330 0.675940 -1.028210 6.799 6.864 6.719 ... NaN NaN NaN NaN NaN NaN NaN NaN
219 0.0 3.08224 3.08202 -1.097430 -2.056400 2.016690 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.2 3.07527 3.06202 -1.113480 -2.045040 2.008790 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.4 3.05837 3.04202 -1.153960 -2.016810 1.988590 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.6 3.02829 3.02202 -1.125190 -2.005830 1.970060 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
0.8 3.00249 3.00202 -1.052950 -2.017270 1.958800 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.0 2.98880 2.98202 -1.036230 -2.018110 1.945870 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.2 2.97650 2.96202 -1.045430 -2.050940 1.886870 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.4 2.94793 2.94202 -1.083810 -2.055260 1.814260 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.6 2.95220 2.92202 -1.108670 -2.032210 1.832060 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
1.8 2.91947 2.90202 -1.083090 -2.029880 1.797170 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN
2.0 2.88906 2.88202 -1.045460 -2.001590 1.802030 6.783 6.773 6.687 ... NaN NaN NaN NaN NaN NaN NaN NaN

132 rows × 5775 columns

In [97]:
res_df = res_df_mi.groupby(axis=1,level='coord').mean().reset_index()
In [98]:
res_df
Out[98]:
coord nmolecules time 1 1 dX 1 dY 1 dZ 1 g 1 X 1 g 1 Y 1 g 1 Z 1 g 2 X 1 g 2 Y 1 g 2 Z 1 ref
0 525 0.0 3.034922 0.010765 -0.024874 0.038641 6.778 6.802 6.763 6.788769 6.777123 6.801643 3.034867
1 525 0.2 3.018362 0.009615 -0.024132 0.039875 6.778 6.802 6.763 6.787619 6.777865 6.802876 3.014867
2 525 0.4 3.000280 0.009741 -0.021807 0.040659 6.778 6.802 6.763 6.787745 6.780190 6.803660 2.994867
3 525 0.6 2.975453 0.010267 -0.019831 0.041684 6.778 6.802 6.763 6.788271 6.782166 6.804685 2.974867
4 525 0.8 2.950287 0.008752 -0.017311 0.042727 6.778 6.802 6.763 6.786756 6.784685 6.805728 2.954867
5 525 1.0 2.931836 0.008292 -0.014080 0.044248 6.778 6.802 6.763 6.786296 6.787917 6.807249 2.934867
6 525 1.2 2.915855 0.006768 -0.010449 0.046073 6.778 6.802 6.763 6.784772 6.791548 6.809074 2.914867
7 525 1.4 2.900949 0.007208 -0.006410 0.047046 6.778 6.802 6.763 6.785213 6.795586 6.810047 2.894867
8 525 1.6 2.882969 0.007942 -0.004268 0.048136 6.778 6.802 6.763 6.785947 6.797730 6.811138 2.874867
9 525 1.8 2.863472 0.007217 -0.003854 0.050202 6.778 6.802 6.763 6.785222 6.798142 6.813203 2.854867
10 525 2.0 2.842942 0.006279 -0.004383 0.050293 6.778 6.802 6.763 6.784284 6.797614 6.813294 2.834867
11 394 0.0 3.039270 0.042538 0.071386 0.102983 6.749 6.708 6.695 6.791542 6.779382 6.797985 3.039219
12 394 0.2 3.021789 0.041296 0.071312 0.100680 6.749 6.708 6.695 6.790301 6.779309 6.795681 3.019219
13 394 0.4 3.005356 0.039927 0.070586 0.098463 6.749 6.708 6.695 6.788931 6.778582 6.793464 2.999219
14 394 0.6 2.981034 0.037926 0.066785 0.099624 6.749 6.708 6.695 6.786931 6.774782 6.794624 2.979219
15 394 0.8 2.955050 0.035288 0.063702 0.097570 6.749 6.708 6.695 6.784293 6.771699 6.792571 2.959219
16 394 1.0 2.934622 0.031641 0.064001 0.099887 6.749 6.708 6.695 6.780645 6.771998 6.794888 2.939219
17 394 1.2 2.917793 0.030269 0.064800 0.101330 6.749 6.708 6.695 6.779274 6.772797 6.796331 2.919219
18 394 1.4 2.902097 0.029058 0.061974 0.103267 6.749 6.708 6.695 6.778063 6.769971 6.798268 2.899219
19 394 1.6 2.884613 0.027490 0.059746 0.105912 6.749 6.708 6.695 6.776495 6.767743 6.800913 2.879219
20 394 1.8 2.865220 0.026572 0.058102 0.103044 6.749 6.708 6.695 6.775577 6.766099 6.798045 2.859219
21 394 2.0 2.845958 0.027230 0.059402 0.102275 6.749 6.708 6.695 6.776234 6.767399 6.797276 2.839219
22 438 0.0 3.034992 -0.005472 -0.046267 0.093900 6.789 6.821 6.695 6.783532 6.774730 6.788901 3.034932
23 438 0.2 3.018102 -0.005551 -0.046558 0.094535 6.789 6.821 6.695 6.783453 6.774439 6.789535 3.014932
24 438 0.4 3.000249 -0.005531 -0.045098 0.095311 6.789 6.821 6.695 6.783474 6.775899 6.790313 2.994932
25 438 0.6 2.975846 -0.004704 -0.042169 0.094428 6.789 6.821 6.695 6.784301 6.778829 6.789429 2.974932
26 438 0.8 2.951186 -0.005260 -0.039267 0.093976 6.789 6.821 6.695 6.783744 6.781730 6.788978 2.954932
27 438 1.0 2.931786 -0.005480 -0.035514 0.095374 6.789 6.821 6.695 6.783524 6.785483 6.790375 2.934932
28 438 1.2 2.915978 -0.002793 -0.034526 0.095118 6.789 6.821 6.695 6.786212 6.786471 6.790119 2.914932
29 438 1.4 2.899683 -0.001023 -0.032910 0.097669 6.789 6.821 6.695 6.787981 6.788087 6.792670 2.894932
30 438 1.6 2.881104 0.000461 -0.031330 0.098089 6.789 6.821 6.695 6.789465 6.789667 6.793090 2.874932
31 438 1.8 2.861946 -0.001186 -0.031247 0.098028 6.789 6.821 6.695 6.787818 6.789750 6.793029 2.854932
32 438 2.0 2.841929 -0.001993 -0.033535 0.100370 6.789 6.821 6.695 6.787011 6.787462 6.795371 2.834932
33 350 0.0 3.036760 0.080290 -0.023688 0.004388 6.720 6.803 6.789 6.800295 6.779309 6.793390 3.036711
34 350 0.2 3.018600 0.081297 -0.021077 0.003408 6.720 6.803 6.789 6.801301 6.781920 6.792409 3.016711
35 350 0.4 3.002237 0.082215 -0.019325 0.002584 6.720 6.803 6.789 6.802220 6.783672 6.791586 2.996711
36 350 0.6 2.977871 0.081499 -0.020808 0.001862 6.720 6.803 6.789 6.801503 6.782189 6.790863 2.976711
37 350 0.8 2.952839 0.081155 -0.021246 0.003388 6.720 6.803 6.789 6.801159 6.781751 6.792389 2.956711
38 350 1.0 2.931092 0.079251 -0.019686 0.007140 6.720 6.803 6.789 6.799255 6.783311 6.796141 2.936711
39 350 1.2 2.913704 0.079847 -0.018476 0.008928 6.720 6.803 6.789 6.799852 6.784521 6.797930 2.916711
40 350 1.4 2.898667 0.081984 -0.018416 0.012155 6.720 6.803 6.789 6.801989 6.784581 6.801156 2.896711
41 350 1.6 2.880808 0.084175 -0.016798 0.016169 6.720 6.803 6.789 6.804179 6.786199 6.805170 2.876711
42 350 1.8 2.861538 0.084581 -0.015834 0.019831 6.720 6.803 6.789 6.804585 6.787163 6.808832 2.856711
43 350 2.0 2.841458 0.081435 -0.017207 0.023532 6.720 6.803 6.789 6.801439 6.785790 6.812533 2.836711
44 263 0.0 3.037367 0.090980 -0.167417 -0.159162 6.719 6.928 6.938 6.809985 6.760580 6.778839 3.037313
45 263 0.2 3.019191 0.090540 -0.165881 -0.158784 6.719 6.928 6.938 6.809544 6.762115 6.779217 3.017313
46 263 0.4 3.003904 0.088006 -0.163625 -0.158928 6.719 6.928 6.938 6.807010 6.764372 6.779073 2.997313
47 263 0.6 2.979121 0.085891 -0.161594 -0.158608 6.719 6.928 6.938 6.804895 6.766403 6.779393 2.977313
48 263 0.8 2.953302 0.085188 -0.160774 -0.155233 6.719 6.928 6.938 6.804192 6.767223 6.782768 2.957313
49 263 1.0 2.931889 0.085824 -0.159726 -0.152970 6.719 6.928 6.938 6.804828 6.768271 6.785031 2.937313
50 263 1.2 2.914128 0.082727 -0.161103 -0.152828 6.719 6.928 6.938 6.801732 6.766893 6.785174 2.917313
51 263 1.4 2.897885 0.084754 -0.159780 -0.148310 6.719 6.928 6.938 6.803759 6.768217 6.789691 2.897313
52 263 1.6 2.881493 0.084530 -0.160834 -0.141476 6.719 6.928 6.938 6.803534 6.767163 6.796525 2.877313
53 263 1.8 2.861136 0.082015 -0.161506 -0.138388 6.719 6.928 6.938 6.801020 6.766491 6.799612 2.857313
54 263 2.0 2.842035 0.077943 -0.163131 -0.130951 6.719 6.928 6.938 6.796948 6.764866 6.807050 2.837313
55 175 0.0 3.030009 0.080984 -0.018843 -0.056047 6.715 6.790 6.825 6.795988 6.771153 6.768955 3.029957
56 175 0.2 3.012094 0.078401 -0.018740 -0.058484 6.715 6.790 6.825 6.793405 6.771258 6.766517 3.009957
57 175 0.4 2.997073 0.076897 -0.015878 -0.059774 6.715 6.790 6.825 6.791901 6.774119 6.765227 2.989957
58 175 0.6 2.972110 0.075941 -0.011430 -0.056586 6.715 6.790 6.825 6.790945 6.778567 6.768416 2.969957
59 175 0.8 2.946350 0.076670 -0.002939 -0.054902 6.715 6.790 6.825 6.791674 6.787057 6.770098 2.949957
60 175 1.0 2.924743 0.078043 0.005130 -0.055743 6.715 6.790 6.825 6.793047 6.795127 6.769258 2.929957
61 175 1.2 2.907656 0.079557 0.009705 -0.057966 6.715 6.790 6.825 6.794562 6.799702 6.767035 2.909957
62 175 1.4 2.889336 0.080505 0.011842 -0.056512 6.715 6.790 6.825 6.795509 6.801839 6.768489 2.889957
63 175 1.6 2.872079 0.080764 0.012456 -0.053796 6.715 6.790 6.825 6.795769 6.802453 6.771206 2.869957
64 175 1.8 2.853288 0.080410 0.016686 -0.051414 6.715 6.790 6.825 6.795415 6.806683 6.773587 2.849957
65 175 2.0 2.834097 0.079160 0.022666 -0.046314 6.715 6.790 6.825 6.794164 6.812662 6.778688 2.829957
66 131 0.0 3.037290 0.008238 0.222722 0.160375 6.785 6.624 6.675 6.793242 6.846719 6.835376 3.037226
67 131 0.2 3.017884 0.011374 0.218104 0.160173 6.785 6.624 6.675 6.796378 6.842101 6.835175 3.017226
68 131 0.4 3.004250 0.011501 0.219179 0.157313 6.785 6.624 6.675 6.796505 6.843176 6.832314 2.997226
69 131 0.6 2.980649 0.012166 0.217925 0.148020 6.785 6.624 6.675 6.797171 6.841922 6.823021 2.977226
70 131 0.8 2.956502 0.014619 0.211890 0.142922 6.785 6.624 6.675 6.799624 6.835887 6.817923 2.957226
71 131 1.0 2.934341 0.011296 0.210212 0.144027 6.785 6.624 6.675 6.796300 6.834209 6.819028 2.937226
72 131 1.2 2.916920 0.006992 0.209758 0.150397 6.785 6.624 6.675 6.791997 6.833754 6.825398 2.917226
73 131 1.4 2.898377 -0.001679 0.203074 0.153058 6.785 6.624 6.675 6.783325 6.827071 6.828059 2.897226
74 131 1.6 2.875767 -0.011129 0.199670 0.158375 6.785 6.624 6.675 6.773875 6.823667 6.833376 2.877226
75 131 1.8 2.858852 -0.014816 0.194977 0.165034 6.785 6.624 6.675 6.770189 6.818974 6.840034 2.857226
76 131 2.0 2.842520 -0.017206 0.186697 0.171387 6.785 6.624 6.675 6.767799 6.810694 6.846389 2.837226
77 44 0.0 3.020985 0.235488 0.313048 0.051773 6.660 6.627 6.730 6.895492 6.940044 6.781774 3.020933
78 44 0.2 3.001215 0.234083 0.311769 0.049222 6.660 6.627 6.730 6.894088 6.938766 6.779223 3.000932
79 44 0.4 2.986466 0.236713 0.311665 0.049282 6.660 6.627 6.730 6.896717 6.938662 6.779282 2.980932
80 44 0.6 2.965443 0.241006 0.310308 0.051667 6.660 6.627 6.730 6.901011 6.937304 6.781668 2.960932
81 44 0.8 2.943091 0.244133 0.312242 0.055588 6.660 6.627 6.730 6.904137 6.939239 6.785590 2.940933
82 44 1.0 2.919757 0.240311 0.313154 0.058092 6.660 6.627 6.730 6.900316 6.940152 6.788093 2.920932
83 44 1.2 2.897124 0.229952 0.315481 0.056023 6.660 6.627 6.730 6.889956 6.942479 6.786024 2.900933
84 44 1.4 2.879173 0.219895 0.314174 0.053120 6.660 6.627 6.730 6.879899 6.941172 6.783121 2.880933
85 44 1.6 2.864009 0.218257 0.311913 0.055171 6.660 6.627 6.730 6.878262 6.938910 6.785172 2.860933
86 44 1.8 2.845171 0.221283 0.306526 0.052747 6.660 6.627 6.730 6.881287 6.933523 6.782748 2.840933
87 44 2.0 2.825302 0.215318 0.307488 0.041060 6.660 6.627 6.730 6.875323 6.934485 6.771061 2.820933
88 306 0.0 3.030811 -0.003623 0.015265 0.000827 6.785 6.759 6.790 6.781381 6.774262 6.790828 3.030759
89 306 0.2 3.011870 -0.003788 0.013854 -0.000054 6.785 6.759 6.790 6.781216 6.772852 6.789947 3.010759
90 306 0.4 2.997445 -0.001588 0.014145 -0.001017 6.785 6.759 6.790 6.783416 6.773142 6.788983 2.990759
91 306 0.6 2.971440 0.000782 0.015051 -0.002138 6.785 6.759 6.790 6.785786 6.774048 6.787864 2.970759
92 306 0.8 2.946581 0.005614 0.016238 -0.003716 6.785 6.759 6.790 6.790618 6.775235 6.786285 2.950759
93 306 1.0 2.927147 0.007186 0.019300 -0.005143 6.785 6.759 6.790 6.792189 6.778297 6.784858 2.930759
94 306 1.2 2.909072 0.009374 0.018696 -0.006815 6.785 6.759 6.790 6.794379 6.777693 6.783186 2.910759
95 306 1.4 2.891492 0.009350 0.016756 -0.010996 6.785 6.759 6.790 6.794355 6.775753 6.779005 2.890759
96 306 1.6 2.873910 0.013068 0.014708 -0.013598 6.785 6.759 6.790 6.798072 6.773705 6.776403 2.870759
97 306 1.8 2.855294 0.014694 0.012045 -0.013692 6.785 6.759 6.790 6.799698 6.771042 6.776309 2.850759
98 306 2.0 2.836348 0.011107 0.012934 -0.014228 6.785 6.759 6.790 6.796111 6.771932 6.775773 2.830759
99 88 0.0 3.036086 0.236927 -0.256623 0.157635 6.624 6.903 6.676 6.860931 6.646373 6.833636 3.036024
100 88 0.2 3.016645 0.236229 -0.255601 0.160637 6.624 6.903 6.676 6.860234 6.647396 6.836638 3.016024
101 88 0.4 3.002765 0.232964 -0.252690 0.160751 6.624 6.903 6.676 6.856968 6.650306 6.836752 2.996024
102 88 0.6 2.979860 0.224186 -0.253779 0.152404 6.624 6.903 6.676 6.848190 6.649218 6.828404 2.976024
103 88 0.8 2.952067 0.217245 -0.251586 0.138371 6.624 6.903 6.676 6.841249 6.651411 6.814372 2.956024
104 88 1.0 2.931130 0.212298 -0.246777 0.130433 6.624 6.903 6.676 6.836303 6.656220 6.806434 2.936024
105 88 1.2 2.911752 0.205931 -0.241666 0.122064 6.624 6.903 6.676 6.829936 6.661331 6.798065 2.916024
106 88 1.4 2.893783 0.200155 -0.243981 0.111793 6.624 6.903 6.676 6.824159 6.659016 6.787794 2.896024
107 88 1.6 2.875490 0.198388 -0.244446 0.103339 6.624 6.903 6.676 6.822393 6.658551 6.779341 2.876024
108 88 1.8 2.856435 0.196086 -0.247459 0.090633 6.624 6.903 6.676 6.820091 6.655538 6.766634 2.856024
109 88 2.0 2.837190 0.192131 -0.250788 0.086682 6.624 6.903 6.676 6.816135 6.652208 6.762682 2.836024
110 481 0.0 3.034603 -0.014782 -0.072324 0.067737 6.799 6.864 6.719 6.784223 6.791673 6.786737 3.034543
111 481 0.2 3.018015 -0.014357 -0.072601 0.067114 6.799 6.864 6.719 6.784647 6.791395 6.786114 3.014543
112 481 0.4 3.000231 -0.014021 -0.072274 0.067635 6.799 6.864 6.719 6.784983 6.791722 6.786636 2.994543
113 481 0.6 2.974872 -0.015174 -0.070603 0.068136 6.799 6.864 6.719 6.783830 6.793394 6.787137 2.974543
114 481 0.8 2.950800 -0.015300 -0.069087 0.070114 6.799 6.864 6.719 6.783704 6.794910 6.789115 2.954543
115 481 1.0 2.931433 -0.014866 -0.067311 0.071437 6.799 6.864 6.719 6.784139 6.796686 6.790438 2.934543
116 481 1.2 2.915312 -0.016355 -0.063953 0.071661 6.799 6.864 6.719 6.782649 6.800044 6.790662 2.914543
117 481 1.4 2.898523 -0.017177 -0.062616 0.071685 6.799 6.864 6.719 6.781828 6.801380 6.790687 2.894543
118 481 1.6 2.880843 -0.019072 -0.062704 0.071277 6.799 6.864 6.719 6.779932 6.801293 6.790278 2.874543
119 481 1.8 2.862234 -0.019518 -0.062286 0.071343 6.799 6.864 6.719 6.779486 6.801711 6.790344 2.854543
120 481 2.0 2.842636 -0.018782 -0.062664 0.070667 6.799 6.864 6.719 6.780222 6.801333 6.789668 2.834543
121 219 0.0 3.035669 -0.001536 0.017633 0.134667 6.783 6.773 6.687 6.781467 6.790630 6.821668 3.035626
122 219 0.2 3.016866 -0.002555 0.017841 0.134384 6.783 6.773 6.687 6.780449 6.790838 6.821385 3.015626
123 219 0.4 3.003203 -0.003735 0.018608 0.134001 6.783 6.773 6.687 6.779269 6.791605 6.821002 2.995626
124 219 0.6 2.978078 -0.001805 0.016775 0.129465 6.783 6.773 6.687 6.781199 6.789773 6.816466 2.975626
125 219 0.8 2.951516 -0.001210 0.017160 0.121625 6.783 6.773 6.687 6.781794 6.790156 6.808625 2.955626
126 219 1.0 2.928783 -0.001437 0.016281 0.115233 6.783 6.773 6.687 6.781567 6.789278 6.802233 2.935626
127 219 1.2 2.912858 -0.004232 0.018190 0.111485 6.783 6.773 6.687 6.778772 6.791187 6.798486 2.915626
128 219 1.4 2.896929 -0.005499 0.018065 0.106649 6.783 6.773 6.687 6.777505 6.791062 6.793650 2.895626
129 219 1.6 2.880101 -0.002200 0.019840 0.099829 6.783 6.773 6.687 6.780804 6.792837 6.786830 2.875626
130 219 1.8 2.861178 0.000377 0.018248 0.094078 6.783 6.773 6.687 6.783382 6.791244 6.781079 2.855626
131 219 2.0 2.840425 0.006052 0.014156 0.090879 6.783 6.773 6.687 6.789057 6.787153 6.777880 2.835626
In [99]:
cols = 2
y_quantities = [
    '1', 
    '1 ref', 
    '1 dX', 
    '1 dY', 
    '1 dZ', 
    '1 g 1 X', 
    '1 g 1 Y', 
    '1 g 1 Z', 
    '1 g 2 X', 
    '1 g 2 Y', 
    '1 g 2 Z'
]
n = len(y_quantities)
rows = round(n/cols)
positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [100]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsPull:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [101]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[101]:
12
In [102]:
# check files degenerate by 'metadata.type' ad 'metadata.name'

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
                'step': '$metadata.step',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['step', 'type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [103]:
res_df
Out[103]:
step type name earliest latest object_count
0 GromacsPull:ProcessAnalyzeAndVisualize:push_filepad mp4_file NaN 2020-07-21 01:13:12.353057 2020-07-21 01:13:13.700579 12
In [104]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["gfs_id"])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_list.append(Video.from_file(tmp.name))
    print('.',end='')
............
In [105]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(481,)
(438,)
(394,)
(350,)
(306,)
(263,)
(219,)
(175,)
(131,)
(88,)
(44,)

Pre-evaluated RDF

Overview

In [106]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[106]:
120
In [107]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [108]:
res_df
Out[108]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-21 01:13:12.353044 2020-07-21 01:13:13.700567 12
1 surfactant_head_surfactant_tail_rdf 2020-07-21 01:13:12.353042 2020-07-21 01:13:13.700564 12
2 surfactant_head_surfactant_head_rdf 2020-07-21 01:13:12.353039 2020-07-21 01:13:13.700561 12
3 substrate_surfactant_tail_rdf 2020-07-21 01:13:12.353037 2020-07-21 01:13:13.700559 12
4 substrate_surfactant_head_rdf 2020-07-21 01:13:12.353034 2020-07-21 01:13:13.700556 12
5 substrate_substrate_rdf 2020-07-21 01:13:12.353032 2020-07-21 01:13:13.700554 12
6 counterion_surfactant_tail_rdf 2020-07-21 01:13:12.353029 2020-07-21 01:13:13.700551 12
7 counterion_surfactant_head_rdf 2020-07-21 01:13:12.353027 2020-07-21 01:13:13.700549 12
8 counterion_substrate_rdf 2020-07-21 01:13:12.353025 2020-07-21 01:13:13.700546 12
9 counterion_counterion_rdf 2020-07-21 01:13:12.353022 2020-07-21 01:13:13.700544 12

Substrate - surfactant head RDF

In [109]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [110]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[110]:
12
In [111]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [112]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [113]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [114]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[114]:
12
In [115]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [116]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [117]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [118]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsPull:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[118]:
12
In [119]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [120]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Energy minimization after solvation analysis

Overview on objects in step

In [121]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad'
}
In [122]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[122]:
240
In [123]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [124]:
res_df
Out[124]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.365566 2020-07-21 01:13:13.714240 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.365564 2020-07-21 01:13:13.714238 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.365561 2020-07-21 01:13:13.714235 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.365558 2020-07-21 01:13:13.714233 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.365556 2020-07-21 01:13:13.714230 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.365554 2020-07-21 01:13:13.714228 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.365551 2020-07-21 01:13:13.714225 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.365549 2020-07-21 01:13:13.714223 12
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.365546 2020-07-21 01:13:13.714220 12
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.365544 2020-07-21 01:13:13.714218 12
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.365542 2020-07-21 01:13:13.714215 12
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.365539 2020-07-21 01:13:13.714213 12
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.365537 2020-07-21 01:13:13.714210 12
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.365535 2020-07-21 01:13:13.714208 12
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.365532 2020-07-21 01:13:13.714205 12
15 topology_file NaN 2020-07-21 01:13:12.365530 2020-07-21 01:13:13.714203 12
16 data_file NaN 2020-07-21 01:13:12.365527 2020-07-21 01:13:13.714200 12
17 trajectory_file NaN 2020-07-21 01:13:12.365525 2020-07-21 01:13:13.714198 12
18 energy_file NaN 2020-07-21 01:13:12.365522 2020-07-21 01:13:13.714194 12
19 log_file NaN 2020-07-21 01:13:12.365516 2020-07-21 01:13:13.714189 12

Global observables

In [125]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',  #{'$regex': 'GromacsEnergyMinimization'}
    "metadata.type": 'energy_file',
}
fp.filepad.count_documents(query)
Out[125]:
12
In [126]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [127]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [128]:
res_mi_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        em_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),em_df.index],
            names=[*c["_id"].keys(),'step'])
        em_mi_df = em_df.set_index(mi)        
        res_mi_list.append(em_mi_df)
    print('.',end='')
print('')

res_mi_df = pd.concat(res_mi_list)
res_df = res_mi_df.reset_index()
............
In [129]:
res_mi_df
Out[129]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Pres-YX Pres-YY Pres-YZ Pres-ZX Pres-ZY Pres-ZZ #Surf*SurfTen T-rest
nmolecules step
350 0.0 0.0 106.049530 30969.919922 11162.725586 2396.903564 89897.656250 836729.812500 -3727134.50 ... 57.025841 -273.003815 -23.946796 71.850533 -23.946798 -253.279099 0.0 0.0
1.0 1.0 106.051193 30969.785156 11162.735352 2397.134277 89898.031250 833711.500000 -3727165.50 ... 56.761074 -273.126740 -24.101181 71.550865 -24.101181 -253.659668 0.0 0.0
2.0 2.0 106.080574 30969.617188 11162.742188 2397.418457 89898.500000 830116.062500 -3727202.25 ... 56.446213 -273.271484 -24.278463 71.196846 -24.278454 -254.108627 0.0 0.0
3.0 3.0 106.156960 30969.410156 11162.755859 2397.761719 89899.085938 825838.000000 -3727247.25 ... 56.070446 -273.440216 -24.481455 70.778709 -24.481472 -254.636780 0.0 0.0
4.0 4.0 106.315788 30969.125000 11162.772461 2398.181641 89899.789062 820750.562500 -3727302.75 ... 55.624382 -273.636627 -24.711657 70.288116 -24.711660 -255.255615 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
394 9994.0 9994.0 478.808197 30542.000000 11551.901367 2586.216797 101716.429688 -131985.062500 -4578958.00 ... -2.618964 -398.338196 -9.657487 15.362790 -9.657487 -397.040253 -0.0 0.0
9995.0 9995.0 481.056549 30540.406250 11551.897461 2585.313965 101715.742188 -131984.109375 -4578960.50 ... -2.619115 -398.339264 -9.657401 15.362783 -9.657397 -397.040802 -0.0 0.0
9996.0 9996.0 482.136139 30542.984375 11551.901367 2586.417236 101716.562500 -131981.953125 -4578968.00 ... -2.619285 -398.340363 -9.657289 15.362905 -9.657287 -397.041443 -0.0 0.0
9997.0 9997.0 486.106934 30540.878906 11551.895508 2585.124268 101715.578125 -131981.218750 -4578969.00 ... -2.619526 -398.341309 -9.657230 15.362894 -9.657231 -397.041718 -0.0 0.0
9999.0 9999.0 476.031311 30540.619141 11551.898438 2585.870117 101716.164062 -131980.062500 -4578973.50 ... -2.619611 -398.342010 -9.657176 15.362936 -9.657174 -397.041779 -0.0 0.0

95073 rows × 33 columns

In [130]:
y_quantities = [
    'Potential',
    'Pressure',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

positions = [
    (0,0),
    (0,1),
    (1,0),
    (2,0),
    (2,1),
]
fig, ax = plt.subplots(3,2,figsize=(10,12))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [131]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsEnergyMinimizationAfterSolvation:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [132]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[132]:
12
In [133]:
# check

aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'nmolecules': '$metadata.system.surfactant.nmolecules'
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

# for i, c in enumerate(cursor): 
#    content, metadata = fp.get_file_by_id(c["latest"])
#    nmolecules = int(c["_id"]["nmolecules"])
    

res = [ {**c['_id'], **c} for c in cursor]
columns = ['nmolecules', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [134]:
res_df
Out[134]:
nmolecules name earliest latest object_count
0 525 NaN 2020-07-21 01:13:13.714240 2020-07-21 01:13:13.714240 1
1 481 NaN 2020-07-21 01:13:13.590214 2020-07-21 01:13:13.590214 1
2 438 NaN 2020-07-21 01:13:13.467257 2020-07-21 01:13:13.467257 1
3 394 NaN 2020-07-21 01:13:13.347795 2020-07-21 01:13:13.347795 1
4 350 NaN 2020-07-21 01:13:13.228101 2020-07-21 01:13:13.228101 1
5 306 NaN 2020-07-21 01:13:13.095156 2020-07-21 01:13:13.095156 1
6 263 NaN 2020-07-21 01:13:12.954722 2020-07-21 01:13:12.954722 1
7 219 NaN 2020-07-21 01:13:12.834089 2020-07-21 01:13:12.834089 1
8 175 NaN 2020-07-21 01:13:12.720613 2020-07-21 01:13:12.720613 1
9 131 NaN 2020-07-21 01:13:12.605763 2020-07-21 01:13:12.605763 1
10 88 NaN 2020-07-21 01:13:12.484941 2020-07-21 01:13:12.484941 1
11 44 NaN 2020-07-21 01:13:12.365566 2020-07-21 01:13:12.365566 1
In [135]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        obj_dict.update({metadata['metadata']['system']['surfactant']['nmolecules']: Video.from_file(tmp.name)})
    print('.',end='')
............
In [136]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
525
481
438
394
350
306
263
219
175
131
88
44

NVT equilibration analysis

Overview on objects in step

In [137]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [138]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[138]:
267
In [139]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [140]:
res_df
Out[140]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.378962 2020-07-21 01:13:13.729390 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.378955 2020-07-21 01:13:13.729388 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.378948 2020-07-21 01:13:13.729385 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.378942 2020-07-21 01:13:13.729383 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.378935 2020-07-21 01:13:13.729380 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.378929 2020-07-21 01:13:13.729378 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.378922 2020-07-21 01:13:13.729376 13
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.378915 2020-07-21 01:13:13.729373 13
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.378909 2020-07-21 01:13:13.729371 13
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.378902 2020-07-21 01:13:13.729368 13
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.378895 2020-07-21 01:13:13.729366 13
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.378889 2020-07-21 01:13:13.729364 13
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.378882 2020-07-21 01:13:13.729361 13
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.378875 2020-07-21 01:13:13.729359 13
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.378868 2020-07-21 01:13:13.729356 13
15 index_file NaN 2020-07-21 01:13:12.378861 2020-07-21 01:13:13.729354 13
16 topology_file NaN 2020-07-21 01:13:12.378854 2020-07-21 01:13:13.729352 13
17 data_file NaN 2020-07-21 01:13:12.378848 2020-07-21 01:13:13.729349 13
18 trajectory_file NaN 2020-07-21 01:13:12.378841 2020-07-21 01:13:13.729347 13
19 energy_file NaN 2020-07-21 01:13:12.378833 2020-07-21 01:13:13.729344 13
20 log_file NaN 2020-07-21 01:13:12.378821 2020-07-21 01:13:13.729338 13

Global observables

In [141]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[141]:
13
In [142]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [143]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [144]:
[ c for c in cursor]
Out[144]:
[{'_id': {'nmolecules': 306},
  'degeneracy': 2,
  'latest': '5f165d3f7dc9cfbf44a2ea4a'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f16903c7dc9cfbf44a43d45'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f1639b37dc9cfbf44a1e5df'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f1683b27dc9cfbf44a3f82d'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f163aa37dc9cfbf44a1f29d'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f163bc07dc9cfbf44a1ff51'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f166e357dc9cfbf44a35e51'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f164d8c7dc9cfbf44a28653'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f1654757dc9cfbf44a2aa69'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f1661d17dc9cfbf44a2f897'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f16721a7dc9cfbf44a38485'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f167fe07dc9cfbf44a3d2d3'}]
In [145]:
res_list = []

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [146]:
res_df.columns
Out[146]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.',
       'Position Rest.', 'Potential', 'Kinetic En.', 'Total Energy',
       'Conserved En.', 'Temperature', 'Pressure', 'Constr. rmsd', 'Vir-XX',
       'Vir-XY', 'Vir-XZ', 'Vir-YX', 'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY',
       'Vir-ZZ', 'Pres-XX', 'Pres-XY', 'Pres-XZ', 'Pres-YX', 'Pres-YY',
       'Pres-YZ', 'Pres-ZX', 'Pres-ZY', 'Pres-ZZ', '#Surf*SurfTen',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [147]:
res_df_mi
Out[147]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
525 0.0 0.0 783.015930 40890.703125 15882.089844 3592.712158 135596.328125 -152611.906250 -4587028.00 ... -4133381.00 477655.56250 0.0 0.0 298.593903 0.0 1.0 1.0
1.0 1.0 6907.512207 70606.929688 19269.679688 4622.277344 134504.187500 -205344.859375 -4144421.25 ... -3685390.75 421481.31250 0.0 0.0 206.871918 0.0 1.0 1.0
2.0 2.0 7911.669922 76077.703125 19949.109375 5150.765137 134709.843750 -225665.906250 -4045137.25 ... -3581715.00 400387.68750 0.0 0.0 237.620514 0.0 1.0 1.0
3.0 3.0 8569.228516 79897.421875 20112.382812 5052.904785 134344.406250 -243986.187500 -3969531.50 ... -3503647.75 381790.15625 0.0 0.0 256.506989 0.0 1.0 1.0
4.0 4.0 9247.992188 81979.546875 20468.628906 5238.154297 134049.062500 -262460.312500 -3907415.75 ... -3439932.50 363504.96875 0.0 0.0 268.449738 0.0 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
175 96.0 96.0 3109.290039 27439.615234 6442.884277 1676.433350 44899.921875 -268570.968750 -3638488.25 ... -3468710.25 344307.25000 0.0 0.0 299.187256 0.0 1.0 1.0
97.0 97.0 2967.511475 27295.527344 6436.663574 1702.368286 44909.246094 -268249.593750 -3639971.00 ... -3470224.50 344559.75000 0.0 0.0 298.442017 0.0 1.0 1.0
98.0 98.0 3004.910400 27397.394531 6368.225586 1700.576782 44766.316406 -265549.937500 -3640810.25 ... -3471301.50 347079.31250 0.0 0.0 296.464905 0.0 1.0 1.0
99.0 99.0 3117.757324 27014.894531 6452.667480 1762.188354 44937.621094 -267507.687500 -3640443.50 ... -3470714.00 345275.46875 0.0 0.0 297.777740 0.0 1.0 1.0
100.0 100.0 2942.428955 27784.935547 6396.532715 1638.451782 44793.921875 -265345.781250 -3641945.25 ... -3472599.00 347461.12500 0.0 0.0 297.373413 0.0 1.0 1.0

1212 rows × 80 columns

In [148]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [149]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNVTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [150]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[150]:
12
In [151]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [152]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(481,)
(438,)
(394,)
(350,)
(306,)
(263,)
(219,)
(175,)
(131,)
(88,)
(44,)

NPT equilibration analysis

Datasets in step

In [153]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [154]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'dataset': '$metadata.step_specific.dtool_push.remote_dataset',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id']['dataset'], **c} for c in cursor]
columns = ['uuid', 'name', 'uri', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [155]:
res_df
Out[155]:
uuid name uri earliest latest object_count
0 71a46b71-f815-4c3b-8671-01eefa3efdae 2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration 2020-07-21 01:13:13.743269 2020-07-21 01:13:13.743321 21
1 f2919ee2-ba4f-47d9-b1b6-a7d7dd16680c 2020-07-21-01-13-13-610584-n-481-m-481-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-610584-n-481-m-481-gromacsnptequilibration 2020-07-21 01:13:13.616543 2020-07-21 01:13:13.616607 21
2 70b89243-3b84-4ae2-b2a6-c97acc07a8b8 2020-07-21-01-13-13-489638-n-438-m-438-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-489638-n-438-m-438-gromacsnptequilibration 2020-07-21 01:13:13.498771 2020-07-21 01:13:13.498824 21
3 cdea2f6a-d027-419d-85e9-7337a4d8cdb3 2020-07-21-01-13-13-368152-n-394-m-394-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-368152-n-394-m-394-gromacsnptequilibration 2020-07-21 01:13:13.373890 2020-07-21 01:13:13.373943 21
4 53c2d3bb-4db1-47be-9651-2dab3fd76f18 2020-07-21-01-13-13-246509-n-350-m-350-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-246509-n-350-m-350-gromacsnptequilibration 2020-07-21 01:13:13.252270 2020-07-21 01:13:13.252321 21
5 902c7c8b-ad91-4e19-a146-98593a8ca541 2020-07-21-01-13-13-126952-n-306-m-306-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-126952-n-306-m-306-gromacsnptequilibration 2020-07-21 01:13:13.133684 2020-07-21 01:13:13.133737 21
6 0c07311d-ba39-43b0-91f3-395f9acd1d72 2020-07-21-01-13-12-975341-n-263-m-263-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-975341-n-263-m-263-gromacsnptequilibration 2020-07-21 01:13:12.982317 2020-07-21 01:13:12.982371 21
7 2c309f81-c757-478e-a8f1-0a129c9bd1dd 2020-07-21-01-13-12-854269-n-219-m-219-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-854269-n-219-m-219-gromacsnptequilibration 2020-07-21 01:13:12.860518 2020-07-21 01:13:12.860571 21
8 6674851e-222d-42d4-b9d6-9c86332cadd8 2020-07-21-01-13-12-738848-n-175-m-175-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-738848-n-175-m-175-gromacsnptequilibration 2020-07-21 01:13:12.744915 2020-07-21 01:13:12.744970 21
9 102deb24-224e-46bf-9e65-e1735bd3b754 2020-07-21-01-13-12-624372-n-131-m-131-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-624372-n-131-m-131-gromacsnptequilibration 2020-07-21 01:13:12.630556 2020-07-21 01:13:12.630611 21
10 29ddbd49-e5e4-4731-a757-8e43d261a397 2020-07-21-01-13-12-508896-n-88-m-88-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-508896-n-88-m-88-gromacsnptequilibration 2020-07-21 01:13:12.514976 2020-07-21 01:13:12.515029 21
11 ae7b32fd-ef3e-411f-b958-9521cb267731 2020-07-21-01-13-12-384927-n-44-m-44-gromacsnptequilibration file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-384927-n-44-m-44-gromacsnptequilibration 2020-07-21 01:13:12.391877 2020-07-21 01:13:12.391930 21

Overview on objects in step

In [156]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad'
}
In [157]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[157]:
252
In [158]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [159]:
res_df
Out[159]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.391930 2020-07-21 01:13:13.743321 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.391927 2020-07-21 01:13:13.743318 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.391925 2020-07-21 01:13:13.743316 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.391922 2020-07-21 01:13:13.743313 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.391920 2020-07-21 01:13:13.743311 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.391917 2020-07-21 01:13:13.743309 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.391915 2020-07-21 01:13:13.743306 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.391912 2020-07-21 01:13:13.743304 12
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.391910 2020-07-21 01:13:13.743302 12
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.391908 2020-07-21 01:13:13.743299 12
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.391905 2020-07-21 01:13:13.743297 12
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.391903 2020-07-21 01:13:13.743295 12
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.391900 2020-07-21 01:13:13.743292 12
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.391898 2020-07-21 01:13:13.743290 12
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.391896 2020-07-21 01:13:13.743288 12
15 index_file NaN 2020-07-21 01:13:12.391893 2020-07-21 01:13:13.743285 12
16 topology_file NaN 2020-07-21 01:13:12.391891 2020-07-21 01:13:13.743283 12
17 data_file NaN 2020-07-21 01:13:12.391888 2020-07-21 01:13:13.743280 12
18 trajectory_file NaN 2020-07-21 01:13:12.391886 2020-07-21 01:13:13.743278 12
19 energy_file NaN 2020-07-21 01:13:12.391883 2020-07-21 01:13:13.743275 12
20 log_file NaN 2020-07-21 01:13:12.391877 2020-07-21 01:13:13.743269 12

Global observables

In [160]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[160]:
12
In [161]:
metadata = fp.filepad.find_one(query)
In [162]:
metadata
Out[162]:
{'_id': ObjectId('5f163f547dc9cfbf44a20c08'),
 'identifier': '/p/scratch/chfr13/hoermann/fireworks/launchpad/launcher_2020-07-21-01-05-21-471340/default.edr',
 'original_file_name': 'default.edr',
 'original_file_path': '/p/scratch/chfr13/hoermann/fireworks/launchpad/launcher_2020-07-21-01-05-21-471340/default.edr',
 'metadata': {'project': '2020-07-21-passivation-trial',
  'datetime': '2020-07-21 01:13:13.743275',
  'step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
  'description': 'Parametric trial runs for indenter passivation',
  'owners': [{'name': 'Johannes Laurin Hörmann',
    'email': 'johannes.hoermann@imtek.uni-freiburg.de',
    'username': 'fr_jh1130',
    'orcid': '0000-0001-5867-695X'}],
  'machine': 'juwels_devel',
  'parameter_label_key_dict': {'n': 'system->surfactant->nmolecules',
   'm': 'system->counterion->nmolecules'},
  'parameter_values': [{'n': 44, 'm': 44},
   {'n': 88, 'm': 88},
   {'n': 131, 'm': 131},
   {'n': 175, 'm': 175},
   {'n': 219, 'm': 219},
   {'n': 263, 'm': 263},
   {'n': 306, 'm': 306},
   {'n': 350, 'm': 350},
   {'n': 394, 'm': 394},
   {'n': 438, 'm': 438},
   {'n': 481, 'm': 481},
   {'n': 525, 'm': 525}],
  'mode': 'trial',
  'system': {'counterion': {'name': 'NA',
    'resname': 'NA',
    'nmolecules': 525,
    'reference_atom': {'name': 'NA'}},
   'surfactant': {'name': 'SDS',
    'resname': 'SDS',
    'nmolecules': 525,
    'connector_atom': {'index': 2, 'position': [3.47, 0.7, 20.88]},
    'head_atom': {'name': 'S'},
    'tail_atom': {'name': 'C12'},
    'bounding_sphere': {'center': [1.885, 0.545, 15.47],
     'radius': 7.758430898061799,
     'radius_connector_atom': 5.639534555262516},
    'head_group': {'diameter': 2.1188963427992826}},
   'substrate': {'name': 'AUM',
    'resname': 'AUM',
    'natoms': 3873,
    'reference_atom': {'name': 'AU'}},
   'solvent': {'name': 'H2O',
    'resname': 'SOL',
    'reference_atom': {'name': 'OW'}},
   'indenter': {'bounding_sphere': {'center': [-0.02151743007486928,
      -0.17756268039515888,
      0.46946045857874186],
     'radius': 26.390609083217864}}},
  'step_specific': {'packing': {'surfactant_indenter': {'outer_atom_index': 1,
     'inner_atom_index': 39,
     'tolerance': 2,
     'constraints': {'R_outer': 45.90747087934146,
      'R_outer_constraint': 43.90747087934146,
      'R_inner_constraint': 30.509505426017146,
      'R_inner': 28.390609083217864}}},
   'pulling': {'pull_atom_name': 'C12',
    'spring_constant': 10000,
    'rate': -0.1,
    'nsteps': 1000},
   'dtool_push': {'dtool_target': '/p/project/chfr13/hoermann4/dtool/DATASETS',
    'remote_dataset': {'uri': 'file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration',
     'uuid': '71a46b71-f815-4c3b-8671-01eefa3efdae',
     'name': '2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration'},
    'local_proto_dataset': {'uri': 'file://juwels01.ib.juwels.fzj.de/p/scratch/chfr13/hoermann/fireworks/launchpad/launcher_2020-07-21-01-05-03-680344/2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration',
     'uuid': '71a46b71-f815-4c3b-8671-01eefa3efdae',
     'name': '2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration'},
    'local_frozen_dataset': {'uri': 'file://juwels01.ib.juwels.fzj.de/p/scratch/chfr13/hoermann/fireworks/launchpad/launcher_2020-07-21-01-05-03-680344/2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration',
     'uuid': '71a46b71-f815-4c3b-8671-01eefa3efdae',
     'name': '2020-07-21-01-13-13-736370-n-525-m-525-gromacsnptequilibration'}}},
  'project_id': '2020-07-21-passivation-trial',
  'creation_date': '2020-07-21 01:12:45.402090',
  'expiration_date': '2022-07-21 01:12:45.402090',
  'type': 'energy_file'},
 'compressed': True,
 'gfs_id': '5f163f547dc9cfbf44a20c06'}
In [163]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [164]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [165]:
[ c for c in cursor]
Out[165]:
[{'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f165a437dc9cfbf44a2be2e'},
 {'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f163f547dc9cfbf44a20c06'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f16675b7dc9cfbf44a3326e'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f16404e7dc9cfbf44a21f30'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f1664077dc9cfbf44a31e3e'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f16739d7dc9cfbf44a39206'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f1677d37dc9cfbf44a3a04d'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f1685257dc9cfbf44a40545'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f16895f7dc9cfbf44a411e7'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f1695b27dc9cfbf44a4620d'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f16414e7dc9cfbf44a22cd5'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f1653627dc9cfbf44a29da0'}]
In [166]:
res_list = []

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [167]:
res_df.columns
Out[167]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.',
       'Position Rest.', 'Potential', 'Kinetic En.', 'Total Energy',
       'Conserved En.', 'Temperature', 'Pressure', 'Constr. rmsd', 'Box-X',
       'Box-Y', 'Box-Z', 'Volume', 'Density', 'pV', 'Enthalpy', 'Vir-XX',
       'Vir-XY', 'Vir-XZ', 'Vir-YX', 'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY',
       'Vir-ZZ', 'Pres-XX', 'Pres-XY', 'Pres-XZ', 'Pres-YX', 'Pres-YY',
       'Pres-YZ', 'Pres-ZX', 'Pres-ZY', 'Pres-ZZ', '#Surf*SurfTen',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [168]:
res_df_mi
Out[168]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
306 0.0 0.0 5325.992676 47985.746094 11606.020508 3002.746826 78007.468750 -280553.81250 -3672588.50 ... -3389411.50 338140.34375 0.0 0.0 299.376801 1.706116 1.0 1.0
1.0 1.0 5064.854492 47531.800781 11415.012695 3003.728760 78207.984375 -318681.56250 -3668624.75 ... -3381978.25 337497.15625 0.0 0.0 297.617889 298.927551 1.0 1.0
2.0 2.0 5348.880371 47413.351562 11541.583008 3216.615479 78160.101562 -319668.31250 -3678709.75 ... -3393210.25 341217.00000 0.0 0.0 297.264893 178.977844 1.0 1.0
3.0 3.0 5223.317871 47331.582031 11387.572266 3029.078125 78118.992188 -323640.03125 -3678432.25 ... -3391972.00 339620.65625 0.0 0.0 298.663605 122.084465 1.0 1.0
4.0 4.0 5442.667969 47610.261719 11536.816406 3017.707031 78092.875000 -323507.34375 -3680309.50 ... -3394937.75 342139.40625 0.0 0.0 297.389465 88.929482 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
44 96.0 96.0 686.234436 6744.473145 1549.529541 380.422516 11241.458008 -299507.09375 -3592958.25 ... -3545980.25 356860.31250 0.0 0.0 296.285767 43.927448 1.0 1.0
97.0 97.0 718.663269 6564.708008 1520.251953 434.862701 11267.512695 -298329.12500 -3596315.75 ... -3549566.50 357762.53125 0.0 0.0 297.457886 42.531563 1.0 1.0
98.0 98.0 747.969116 6651.683594 1554.880127 392.532532 11219.794922 -298274.78125 -3595212.00 ... -3547983.50 357772.53125 0.0 0.0 295.978271 43.907619 1.0 1.0
99.0 99.0 674.302551 6480.134277 1570.608276 429.110443 11294.245117 -300619.25000 -3590330.50 ... -3543363.50 355433.78125 0.0 0.0 296.465057 43.936378 1.0 1.0
100.0 100.0 712.136780 6444.956055 1589.672607 443.459991 11324.289062 -297933.50000 -3595668.75 ... -3548920.50 358164.68750 0.0 0.0 296.528442 42.908562 1.0 1.0

1212 rows × 87 columns

In [169]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Volume',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [170]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [171]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[171]:
12
In [172]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [173]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(481,)
(438,)
(394,)
(350,)
(306,)
(263,)
(219,)
(175,)
(131,)
(88,)
(44,)

Pre-evaluated RDF

Overview

In [174]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[174]:
120
In [175]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [176]:
res_df
Out[176]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-21 01:13:12.391917 2020-07-21 01:13:13.743309 12
1 surfactant_head_surfactant_tail_rdf 2020-07-21 01:13:12.391915 2020-07-21 01:13:13.743306 12
2 surfactant_head_surfactant_head_rdf 2020-07-21 01:13:12.391912 2020-07-21 01:13:13.743304 12
3 substrate_surfactant_tail_rdf 2020-07-21 01:13:12.391910 2020-07-21 01:13:13.743302 12
4 substrate_surfactant_head_rdf 2020-07-21 01:13:12.391908 2020-07-21 01:13:13.743299 12
5 substrate_substrate_rdf 2020-07-21 01:13:12.391905 2020-07-21 01:13:13.743297 12
6 counterion_surfactant_tail_rdf 2020-07-21 01:13:12.391903 2020-07-21 01:13:13.743295 12
7 counterion_surfactant_head_rdf 2020-07-21 01:13:12.391900 2020-07-21 01:13:13.743292 12
8 counterion_substrate_rdf 2020-07-21 01:13:12.391898 2020-07-21 01:13:13.743290 12
9 counterion_counterion_rdf 2020-07-21 01:13:12.391896 2020-07-21 01:13:13.743288 12

Substrate - surfactant head RDF

In [177]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [178]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[178]:
12
In [179]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [180]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [181]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [182]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[182]:
12
In [183]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [184]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [185]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [186]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[186]:
12
In [187]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [188]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - substrate RDF

In [189]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [190]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_substrate_rdf',
    "metadata.step": "GromacsNPTEquilibration:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[190]:
12
In [191]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [192]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][0][cutoff_index_inner:cutoff_index_outer], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][len(data)//2][cutoff_index_inner:cutoff_index_outer],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][-1][cutoff_index_inner:cutoff_index_outer],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Relaxation analysis

Datasets in step

In [193]:
# queries to the data base are simple dictionaries
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad'
}
In [194]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'dataset': '$metadata.step_specific.dtool_push.remote_dataset',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id']['dataset'], **c} for c in cursor]
columns = ['uuid', 'name', 'uri', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [195]:
res_df
Out[195]:
uuid name uri earliest latest object_count
0 a0822e65-a0e5-45f5-8744-3227b09ce877 2020-07-21-01-13-13-750539-n-525-m-525-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-750539-n-525-m-525-gromacsrelaxation 2020-07-21 01:13:13.756693 2020-07-21 01:13:13.756743 21
1 268dea24-811a-4159-b47b-40f6ab74f0e6 2020-07-21-01-13-13-623594-n-481-m-481-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-623594-n-481-m-481-gromacsrelaxation 2020-07-21 01:13:13.629738 2020-07-21 01:13:13.629793 21
2 960fe805-a615-4600-82ff-a99977fddc78 2020-07-21-01-13-13-504608-n-438-m-438-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-504608-n-438-m-438-gromacsrelaxation 2020-07-21 01:13:13.510923 2020-07-21 01:13:13.511002 21
3 053386b4-1631-4714-9ca5-f335a1081c3d 2020-07-21-01-13-13-381288-n-394-m-394-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-381288-n-394-m-394-gromacsrelaxation 2020-07-21 01:13:13.387565 2020-07-21 01:13:13.387620 21
4 cefa70fb-6767-459e-9edb-5e49ef7b340e 2020-07-21-01-13-13-259185-n-350-m-350-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-259185-n-350-m-350-gromacsrelaxation 2020-07-21 01:13:13.264831 2020-07-21 01:13:13.264881 21
5 b3f6c4fd-55ec-43c0-914a-4cc0dfbe6309 2020-07-21-01-13-13-140608-n-306-m-306-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-13-140608-n-306-m-306-gromacsrelaxation 2020-07-21 01:13:13.147043 2020-07-21 01:13:13.147095 21
6 41b27d95-1e58-48c4-8ba9-35f5a6b2ef25 2020-07-21-01-13-12-990048-n-263-m-263-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-990048-n-263-m-263-gromacsrelaxation 2020-07-21 01:13:12.997202 2020-07-21 01:13:12.997258 21
7 ece461b3-466e-4eca-a028-4bce74a4ff8c 2020-07-21-01-13-12-867252-n-219-m-219-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-867252-n-219-m-219-gromacsrelaxation 2020-07-21 01:13:12.873911 2020-07-21 01:13:12.873966 21
8 fdcb613e-5a26-4cd4-8a2d-c91e624f25ae 2020-07-21-01-13-12-751002-n-175-m-175-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-751002-n-175-m-175-gromacsrelaxation 2020-07-21 01:13:12.757246 2020-07-21 01:13:12.757301 21
9 47f93833-95b6-482f-aa8a-b0f41155382a 2020-07-21-01-13-12-637269-n-131-m-131-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-637269-n-131-m-131-gromacsrelaxation 2020-07-21 01:13:12.643612 2020-07-21 01:13:12.643665 21
10 9269d5d5-93a4-4988-b7e4-8802e273728f 2020-07-21-01-13-12-521063-n-88-m-88-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-521063-n-88-m-88-gromacsrelaxation 2020-07-21 01:13:12.527302 2020-07-21 01:13:12.527356 21
11 05ae33bc-5585-49eb-990d-0e45a52e0a8c 2020-07-21-01-13-12-397569-n-44-m-44-gromacsrelaxation file://juwels01.ib.juwels.fzj.de/p/project/chfr13/hoermann4/dtool/DATASETS/2020-07-21-01-13-12-397569-n-44-m-44-gromacsrelaxation 2020-07-21 01:13:12.403514 2020-07-21 01:13:12.403569 21

Overview on objects in step

In [196]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[196]:
252
In [197]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
                'name': '$metadata.name',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'name', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [198]:
res_df
Out[198]:
type name earliest latest object_count
0 mp4_file NaN 2020-07-21 01:13:12.403569 2020-07-21 01:13:13.756743 12
1 surfactant_tail_rmsd NaN 2020-07-21 01:13:12.403567 2020-07-21 01:13:13.756741 12
2 surfactant_head_rmsd NaN 2020-07-21 01:13:12.403564 2020-07-21 01:13:13.756738 12
3 substrate_rmsd NaN 2020-07-21 01:13:12.403561 2020-07-21 01:13:13.756736 12
4 counterion_rmsd NaN 2020-07-21 01:13:12.403559 2020-07-21 01:13:13.756734 12
5 surfactant_tail_surfactant_tail_rdf NaN 2020-07-21 01:13:12.403556 2020-07-21 01:13:13.756731 12
6 surfactant_head_surfactant_tail_rdf NaN 2020-07-21 01:13:12.403554 2020-07-21 01:13:13.756729 12
7 surfactant_head_surfactant_head_rdf NaN 2020-07-21 01:13:12.403551 2020-07-21 01:13:13.756727 12
8 substrate_surfactant_tail_rdf NaN 2020-07-21 01:13:12.403549 2020-07-21 01:13:13.756725 12
9 substrate_surfactant_head_rdf NaN 2020-07-21 01:13:12.403546 2020-07-21 01:13:13.756722 12
10 substrate_substrate_rdf NaN 2020-07-21 01:13:12.403544 2020-07-21 01:13:13.756720 12
11 counterion_surfactant_tail_rdf NaN 2020-07-21 01:13:12.403541 2020-07-21 01:13:13.756718 12
12 counterion_surfactant_head_rdf NaN 2020-07-21 01:13:12.403539 2020-07-21 01:13:13.756716 12
13 counterion_substrate_rdf NaN 2020-07-21 01:13:12.403536 2020-07-21 01:13:13.756713 12
14 counterion_counterion_rdf NaN 2020-07-21 01:13:12.403534 2020-07-21 01:13:13.756711 12
15 index_file NaN 2020-07-21 01:13:12.403531 2020-07-21 01:13:13.756709 12
16 topology_file NaN 2020-07-21 01:13:12.403529 2020-07-21 01:13:13.756706 12
17 data_file NaN 2020-07-21 01:13:12.403526 2020-07-21 01:13:13.756704 12
18 trajectory_file NaN 2020-07-21 01:13:12.403524 2020-07-21 01:13:13.756702 12
19 energy_file NaN 2020-07-21 01:13:12.403521 2020-07-21 01:13:13.756699 12
20 log_file NaN 2020-07-21 01:13:12.403514 2020-07-21 01:13:13.756693 12

Global observables

In [199]:
query = { 
    "metadata.project": project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
    "metadata.type":    'energy_file',
}
fp.filepad.count_documents(query)
Out[199]:
12
In [200]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [201]:
aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)
In [202]:
[ c for c in cursor]
Out[202]:
[{'_id': {'nmolecules': 525},
  'degeneracy': 1,
  'latest': '5f1649aa7dc9cfbf44a23b2a'},
 {'_id': {'nmolecules': 394},
  'degeneracy': 1,
  'latest': '5f165c1c7dc9cfbf44a2d143'},
 {'_id': {'nmolecules': 481},
  'degeneracy': 1,
  'latest': '5f164b077dc9cfbf44a25445'},
 {'_id': {'nmolecules': 350},
  'degeneracy': 1,
  'latest': '5f1662ee7dc9cfbf44a30542'},
 {'_id': {'nmolecules': 263},
  'degeneracy': 1,
  'latest': '5f166fe67dc9cfbf44a36b92'},
 {'_id': {'nmolecules': 306},
  'degeneracy': 1,
  'latest': '5f166b7b7dc9cfbf44a34573'},
 {'_id': {'nmolecules': 219},
  'degeneracy': 1,
  'latest': '5f167c137dc9cfbf44a3b9e9'},
 {'_id': {'nmolecules': 131},
  'degeneracy': 1,
  'latest': '5f168d8a7dc9cfbf44a4248a'},
 {'_id': {'nmolecules': 88},
  'degeneracy': 1,
  'latest': '5f16922a7dc9cfbf44a449a3'},
 {'_id': {'nmolecules': 44},
  'degeneracy': 1,
  'latest': '5f169e197dc9cfbf44a46e80'},
 {'_id': {'nmolecules': 438},
  'degeneracy': 1,
  'latest': '5f164c4a7dc9cfbf44a26d4a'},
 {'_id': {'nmolecules': 175},
  'degeneracy': 1,
  'latest': '5f1680ad7dc9cfbf44a3df61'}]
In [203]:
res_list = []

cursor = fp.filepad.aggregate(aggregation_pipeline)

for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    with tempfile.NamedTemporaryFile(suffix='.edr') as tmp:
        tmp.write(content)
        res_df = panedr.edr_to_df(tmp.name)
        
        mi = pd.MultiIndex.from_product(
            [c["_id"].values(),res_df.index],
            names=[*c["_id"].keys(),'step'])
        res_mi_df = res_df.set_index(mi)
        res_list.append(res_mi_df)
    print('.',end='')
print('')
res_df_mi = pd.concat(res_list)
res_df = res_df_mi.reset_index()
............
In [204]:
res_df.columns
Out[204]:
Index(['nmolecules', 'step', 'Time', 'Bond', 'U-B', 'Proper Dih.', 'LJ-14',
       'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)', 'Coul. recip.',
       'Position Rest.', 'Potential', 'Kinetic En.', 'Total Energy',
       'Conserved En.', 'Temperature', 'Pressure', 'Constr. rmsd', 'Box-X',
       'Box-Y', 'Box-Z', 'Volume', 'Density', 'pV', 'Enthalpy', 'Vir-XX',
       'Vir-XY', 'Vir-XZ', 'Vir-YX', 'Vir-YY', 'Vir-YZ', 'Vir-ZX', 'Vir-ZY',
       'Vir-ZZ', 'Pres-XX', 'Pres-XY', 'Pres-XZ', 'Pres-YX', 'Pres-YY',
       'Pres-YZ', 'Pres-ZX', 'Pres-ZY', 'Pres-ZZ', '#Surf*SurfTen',
       'Box-Vel-XX', 'Box-Vel-YY', 'Box-Vel-ZZ',
       'Coul-SR:Surfactant-Surfactant', 'LJ-SR:Surfactant-Surfactant',
       'Coul-14:Surfactant-Surfactant', 'LJ-14:Surfactant-Surfactant',
       'Coul-SR:Surfactant-Substrate', 'LJ-SR:Surfactant-Substrate',
       'Coul-14:Surfactant-Substrate', 'LJ-14:Surfactant-Substrate',
       'Coul-SR:Surfactant-Ion', 'LJ-SR:Surfactant-Ion',
       'Coul-14:Surfactant-Ion', 'LJ-14:Surfactant-Ion',
       'Coul-SR:Surfactant-rest', 'LJ-SR:Surfactant-rest',
       'Coul-14:Surfactant-rest', 'LJ-14:Surfactant-rest',
       'Coul-SR:Substrate-Substrate', 'LJ-SR:Substrate-Substrate',
       'Coul-14:Substrate-Substrate', 'LJ-14:Substrate-Substrate',
       'Coul-SR:Substrate-Ion', 'LJ-SR:Substrate-Ion', 'Coul-14:Substrate-Ion',
       'LJ-14:Substrate-Ion', 'Coul-SR:Substrate-rest', 'LJ-SR:Substrate-rest',
       'Coul-14:Substrate-rest', 'LJ-14:Substrate-rest', 'Coul-SR:Ion-Ion',
       'LJ-SR:Ion-Ion', 'Coul-14:Ion-Ion', 'LJ-14:Ion-Ion', 'Coul-SR:Ion-rest',
       'LJ-SR:Ion-rest', 'Coul-14:Ion-rest', 'LJ-14:Ion-rest',
       'Coul-SR:rest-rest', 'LJ-SR:rest-rest', 'Coul-14:rest-rest',
       'LJ-14:rest-rest', 'T-non-Substrate', 'T-Substrate',
       'Lamb-non-Substrate', 'Lamb-Substrate'],
      dtype='object')
In [205]:
res_df_mi
Out[205]:
Time Bond U-B Proper Dih. LJ-14 Coulomb-14 LJ (SR) Coulomb (SR) ... Coul-SR:rest-rest LJ-SR:rest-rest Coul-14:rest-rest LJ-14:rest-rest T-non-Substrate T-Substrate Lamb-non-Substrate Lamb-Substrate
nmolecules step
44 0.0 0.0 727.516357 6460.637695 1589.852173 442.834778 11321.890625 -297654.65625 -3595528.25 ... -3548792.50 358401.62500 0.0 0.0 297.732666 42.913223 1.0 1.0
1.0 1.0 637.103760 6680.111816 1664.305664 436.721252 11223.439453 -299945.03125 -3591431.00 ... -3544092.00 356309.84375 0.0 0.0 296.206055 43.141201 1.0 1.0
2.0 2.0 642.045593 6645.769043 1640.131836 433.048035 11230.026367 -293239.03125 -3597814.50 ... -3551162.75 363227.90625 0.0 0.0 296.402557 43.175102 1.0 1.0
3.0 3.0 689.675598 6495.912109 1608.256226 447.823822 11255.919922 -296287.34375 -3595840.25 ... -3548502.50 359893.12500 0.0 0.0 296.442810 44.660847 1.0 1.0
4.0 4.0 690.900574 6662.355469 1624.773682 440.674133 11202.725586 -298627.62500 -3593248.75 ... -3545816.75 357717.09375 0.0 0.0 296.806183 44.131638 1.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88 196.0 196.0 1407.950073 13195.257812 3294.906006 900.900635 21675.939453 -305988.68750 -3603197.25 ... -3500001.75 352925.18750 0.0 0.0 296.846649 43.653877 1.0 1.0
197.0 197.0 1408.531982 13111.499023 3227.258789 865.946228 21715.794922 -304492.09375 -3605858.75 ... -3502690.25 354329.21875 0.0 0.0 297.602112 44.336353 1.0 1.0
198.0 198.0 1408.231567 13204.254883 3365.327881 855.522583 21777.230469 -304386.93750 -3605296.50 ... -3501683.25 354172.06250 0.0 0.0 297.060028 43.724331 1.0 1.0
199.0 199.0 1368.782837 13244.612305 3242.871582 830.326782 21635.414062 -303648.37500 -3604465.00 ... -3500951.50 355047.65625 0.0 0.0 296.349640 45.628654 1.0 1.0
200.0 200.0 1432.058594 13058.166992 3266.754395 802.775940 21658.347656 -305025.87500 -3602718.25 ... -3498495.50 353472.28125 0.0 0.0 296.858856 43.783363 1.0 1.0

2412 rows × 90 columns

In [206]:
#n = len(res_df['nmolecules'].unique())
y_quantities = [
    'Temperature',
    'Pressure',
    'Volume',
    'Potential',
    'Bond',
    'Coulomb (SR)',
    'Coul. recip.',
    ]

n = len(y_quantities)
cols = 2
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
for key, grp in res_df.groupby(['nmolecules']):
    for y_quantity, position in zip(y_quantities, positions):
        grp.plot('Time',y_quantity,ax=ax[position],label=key,title=y_quantity)
        
fig.tight_layout()

Visualize trajectory

In [266]:
query = {
    'metadata.project': project_id,
    'metadata.step': 'GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad',
    'metadata.type': 'mp4_file',
}
In [267]:
# use underlying MongoDB functionality to check total number of documents matching query
fp.filepad.count_documents(query)
Out[267]:
12
In [268]:
parameter_dict = {'nmolecules': 'metadata.system.surfactant.nmolecules'}

aggregation_pipeline = [
    {
        "$match": query
    },
    {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    },
    { 
        "$group": { 
            "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
            "degeneracy": {"$sum": 1}, # number matching data sets
            "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
        }
    },
    {
        "$sort": { 
            "_id.nmolecules": pymongo.DESCENDING,
        }
    },
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

obj_dict = {}
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    # print(metadata['metadata'])
    with tempfile.NamedTemporaryFile(suffix='.mp4',delete=False) as tmp:
        tmp.write(content)
        # obj_list.append(Video(filename=tmp.name)) 
        # obj_list.append(tmp.name)
        key = tuple(c["_id"].values())
        obj_dict.update({key: Video.from_file(tmp.name)})
    print('.',end='')
............
In [269]:
for key, obj in obj_dict.items():
    print(key)
    display(obj)
(525,)
(481,)
(438,)
(394,)
(350,)
(306,)
(263,)
(219,)
(175,)
(131,)
(88,)
(44,)

Pre-evaluated RDF

Overview

In [211]:
query = { 
    "metadata.project": project_id,
    "metadata.type": {'$regex': '.*rdf$'},
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[211]:
120
In [212]:
# check files degenerate by 'metadata.type' ad 'metadata.name'
aggregation_pipeline = [
    {
        "$match": query
    },
    {  # group by unique project id
        "$group": { 
            "_id": { 
                'type': '$metadata.type',
            },
            "object_count": {"$sum": 1}, # count matching data sets
            "earliest":  {'$min': '$metadata.datetime' },
            "latest":  {'$max': '$metadata.datetime' },
        },
    },
    {  # sort by earliest date, descending
        "$sort": { 
            "earliest": pymongo.DESCENDING,
        }
    }
]

cursor = fp.filepad.aggregate(aggregation_pipeline)

res = [ {**c['_id'], **c} for c in cursor]
columns = ['type', 'earliest', 'latest', 'object_count', '_id']
res_df = pd.DataFrame(data=res, columns=columns) # pandas Dataframe is just nice for printing in notebook
del res_df["_id"]
In [213]:
res_df
Out[213]:
type earliest latest object_count
0 surfactant_tail_surfactant_tail_rdf 2020-07-21 01:13:12.403556 2020-07-21 01:13:13.756731 12
1 surfactant_head_surfactant_tail_rdf 2020-07-21 01:13:12.403554 2020-07-21 01:13:13.756729 12
2 surfactant_head_surfactant_head_rdf 2020-07-21 01:13:12.403551 2020-07-21 01:13:13.756727 12
3 substrate_surfactant_tail_rdf 2020-07-21 01:13:12.403549 2020-07-21 01:13:13.756725 12
4 substrate_surfactant_head_rdf 2020-07-21 01:13:12.403546 2020-07-21 01:13:13.756722 12
5 substrate_substrate_rdf 2020-07-21 01:13:12.403544 2020-07-21 01:13:13.756720 12
6 counterion_surfactant_tail_rdf 2020-07-21 01:13:12.403541 2020-07-21 01:13:13.756718 12
7 counterion_surfactant_head_rdf 2020-07-21 01:13:12.403539 2020-07-21 01:13:13.756716 12
8 counterion_substrate_rdf 2020-07-21 01:13:12.403536 2020-07-21 01:13:13.756713 12
9 counterion_counterion_rdf 2020-07-21 01:13:12.403534 2020-07-21 01:13:13.756711 12

Substrate - surfactant head RDF

In [214]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [215]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[215]:
12
In [216]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
aggregation_pipeline = [ match_aggregation, sort_aggregation, group_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [217]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - surfactant tail RDF

In [218]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [219]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[219]:
12
In [220]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [221]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
    
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant tail RDF

In [222]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [223]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[223]:
12
In [224]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [225]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - counterion RDF

In [226]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [227]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'counterion_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[227]:
12
In [228]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [229]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'],data['rdf'][0], label='First frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][len(data)//2],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'],data['rdf'][-1],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant head - surfactant head RDF

In [230]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [231]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_head_surfactant_head_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[231]:
12
In [232]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [233]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][0][cutoff_index_inner:], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][len(data)//2][cutoff_index_inner:],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:],data['rdf'][-1][cutoff_index_inner:],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Surfactant tail - surfactant tail RDF

In [234]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [235]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'surfactant_tail_surfactant_tail_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[235]:
12
In [236]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [237]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][0][cutoff_index_inner:cutoff_index_outer], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][len(data)//2][cutoff_index_inner:cutoff_index_outer],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][-1][cutoff_index_inner:cutoff_index_outer],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()

Substrate - substrate RDF

In [238]:
parameter_dict = {
    'nmolecules': 'metadata.system.surfactant.nmolecules'}
In [239]:
query = { 
    "metadata.project": project_id,
    "metadata.type": 'substrate_substrate_rdf',
    "metadata.step": "GromacsRelaxation:ProcessAnalyzeAndVisualize:push_filepad",
}

fp.filepad.count_documents(query)
Out[239]:
12
In [240]:
res_dict = {}
failed_list = []

match_aggregation = {
        "$match": query
    }
sort_aggregation = {
        "$sort": { 
            "metadata.datetime": pymongo.DESCENDING,
        }
    }
group_aggregation = { 
    "$group": { 
        "_id": { field: '${}'.format(key) for field, key in parameter_dict.items() },
        "degeneracy": {"$sum": 1}, # number matching data sets
        "latest":     {"$first": "$gfs_id"} # unique gridfs id of file
    }
}
second_sort_aggregation = {
    "$sort": { 
        "_id.nmolecules": pymongo.DESCENDING,
    }
}


aggregation_pipeline = [ 
    match_aggregation, sort_aggregation, group_aggregation, second_sort_aggregation ]
cursor = fp.filepad.aggregate(aggregation_pipeline)

# res_list = []
for i, c in enumerate(cursor): 
    content, metadata = fp.get_file_by_id(c["latest"])
    nmolecules = int(c["_id"]["nmolecules"])
    data_str = io.StringIO(content.decode())
    data = np.loadtxt(data_str, comments='#')
    d = data[0] # distance bins
    rdf = data[1:]
    res_dict[nmolecules] = {'dist': d, 'rdf': rdf}
    # res_list.append(data)
    print('.',end='')
print('')
............
In [241]:
n = len(res_dict)
cols = 2 if n > 1 else 1
rows = round(n/cols)
cutoff_index_inner = 1
cutoff_index_outer = -1
if rows > 1:
    positions = [(i,j) for i in range(rows) for j in range(cols)][:n]
else:
    positions = [i for i in range(cols)][:n]
    
fig, ax = plt.subplots(rows,cols,figsize=(5*cols,4*rows))
if not isinstance(ax, Iterable):
    ax = [ax]
# for key, grp in res_df.groupby(['nmolecules']):
for pos, (nmolecules, data) in zip(positions, res_dict.items()):
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][0][cutoff_index_inner:cutoff_index_outer], label='First frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][len(data)//2][cutoff_index_inner:cutoff_index_outer],label='Intermediate frame RDF')
    ax[pos].plot(data['dist'][cutoff_index_inner:cutoff_index_outer],data['rdf'][-1][cutoff_index_inner:cutoff_index_outer],label='Last frame RDF')
    ax[pos].set_title(nmolecules)
    ax[pos].legend()

fig.tight_layout()
# fig.legend()
fig.show()
In [ ]: